//===- DXILResourceAccess.cpp - Resource access via load/store ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "DXILResourceAccess.h" #include "DirectX.h" #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/Frontend/HLSL/HLSLResource.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/User.h" #include "llvm/InitializePasses.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Transforms/Utils/ValueMapper.h" #define DEBUG_TYPE "dxil-resource-access" using namespace llvm; static void diagnoseNonUniqueResourceAccess(Instruction *I, ArrayRef Handles) { LLVMContext &Context = I->getContext(); std::string InstStr; raw_string_ostream InstOS(InstStr); I->print(InstOS); Context.diagnose( DiagnosticInfoGeneric("At resource access:" + Twine(InstStr), DS_Note)); for (auto *Handle : Handles) { std::string HandleStr; raw_string_ostream HandleOS(HandleStr); Handle->print(HandleOS); Context.diagnose(DiagnosticInfoGeneric( "Uses resource handle:" + Twine(HandleStr), DS_Note)); } Context.diagnose(DiagnosticInfoGeneric( "Resource access is not guaranteed to map to a unique global resource")); } static Value *traverseGEPOffsets(const DataLayout &DL, IRBuilder<> &Builder, Value *Ptr, uint64_t AccessSize) { Value *Offset = nullptr; while (Ptr) { if (auto *II = dyn_cast(Ptr)) { assert(II->getIntrinsicID() == Intrinsic::dx_resource_getpointer && "Resource access through unexpected intrinsic"); return Offset ? Offset : ConstantInt::get(Builder.getInt32Ty(), 0); } auto *GEP = dyn_cast(Ptr); assert(GEP && "Resource access through unexpected instruction"); unsigned NumIndices = GEP->getNumIndices(); uint64_t IndexScale = DL.getTypeAllocSize(GEP->getSourceElementType()); APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); Value *GEPOffset; if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { // We have a constant offset (in bytes). GEPOffset = ConstantInt::get(DL.getIndexType(GEP->getType()), ConstantOffset); IndexScale = 1; } else if (NumIndices == 1) { // If we have a single index we're indexing into a top level array. This // generally only happens with cbuffers. GEPOffset = *GEP->idx_begin(); } else if (NumIndices == 2) { // If we have two indices, this should be an access through a pointer. auto *IndexIt = GEP->idx_begin(); assert(cast(IndexIt)->getZExtValue() == 0 && "GEP is not indexing through pointer"); GEPOffset = *(++IndexIt); } else llvm_unreachable("Unhandled GEP structure for resource access"); uint64_t ElemSize = AccessSize; if (!(IndexScale % ElemSize)) { // If our scale is an exact multiple of the access size, adjust the // scaling to avoid an unnecessary division. IndexScale /= ElemSize; ElemSize = 1; } if (IndexScale != 1) GEPOffset = Builder.CreateMul( GEPOffset, ConstantInt::get(Builder.getInt32Ty(), IndexScale)); if (ElemSize != 1) GEPOffset = Builder.CreateUDiv( GEPOffset, ConstantInt::get(Builder.getInt32Ty(), ElemSize)); Offset = Offset ? Builder.CreateAdd(Offset, GEPOffset) : GEPOffset; Ptr = GEP->getPointerOperand(); } llvm_unreachable("GEP of null pointer?"); } static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = SI->getDataLayout(); IRBuilder<> Builder(SI); Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0); Type *ScalarType = ContainedType->getScalarType(); Type *LoadType = StructType::get(ContainedType, Builder.getInt1Ty()); Value *V = SI->getValueOperand(); if (V->getType() == ContainedType) { // V is already the right type. assert(SI->getPointerOperand() == II && "Store of whole element has mismatched address to store to"); } else if (V->getType() == ScalarType) { // We're storing a scalar, so we need to load the current value and only // replace the relevant part. auto *Load = Builder.CreateIntrinsic( LoadType, Intrinsic::dx_resource_load_typedbuffer, {II->getOperand(0), II->getOperand(1)}); auto *Struct = Builder.CreateExtractValue(Load, {0}); uint64_t AccessSize = DL.getTypeSizeInBits(ScalarType) / 8; Value *Offset = traverseGEPOffsets(DL, Builder, SI->getPointerOperand(), AccessSize); V = Builder.CreateInsertElement(Struct, V, Offset); } else { llvm_unreachable("Store to typed resource has invalid type"); } auto *Inst = Builder.CreateIntrinsic( Builder.getVoidTy(), Intrinsic::dx_resource_store_typedbuffer, {II->getOperand(0), II->getOperand(1), V}); SI->replaceAllUsesWith(Inst); } static void emitRawStore(IRBuilder<> &Builder, Value *Buffer, Value *Index, Value *Offset, Value *V, dxil::ResourceTypeInfo &RTI) { // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access // entirely into the index. if (!RTI.isStruct()) { auto *ConstantOffset = dyn_cast(Offset); if (!ConstantOffset || !ConstantOffset->isZero()) Index = Builder.CreateAdd(Index, Offset); Offset = llvm::PoisonValue::get(Builder.getInt32Ty()); } Builder.CreateIntrinsic(Builder.getVoidTy(), Intrinsic::dx_resource_store_rawbuffer, {Buffer, Index, Offset, V}); } static void createRawStores(IntrinsicInst *II, StoreInst *SI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = SI->getDataLayout(); IRBuilder<> Builder(SI); Value *V = SI->getValueOperand(); assert(!V->getType()->isAggregateType() && "Resource store should be scalar or vector type"); Value *Index = II->getOperand(1); // The offset for the rawbuffer load and store ops is always in bytes. uint64_t AccessSize = 1; Value *Offset = traverseGEPOffsets(DL, Builder, SI->getPointerOperand(), AccessSize); auto *VT = dyn_cast(V->getType()); if (VT && VT->getNumElements() > 4) { // Split into stores of at most 4 elements. Type *EltTy = VT->getElementType(); Value *Stride = ConstantInt::get(Builder.getInt32Ty(), 4 * (DL.getTypeSizeInBits(EltTy) / 8)); SmallVector Indices; for (unsigned int I = 0, N = VT->getNumElements(); I < N; I += 4) { if (I > 0) Offset = Builder.CreateAdd(Offset, Stride); for (unsigned int J = I, E = std::min(N, J + 4); J < E; ++J) Indices.push_back(J); Value *Part = Builder.CreateShuffleVector(V, Indices); emitRawStore(Builder, II->getOperand(0), Index, Offset, Part, RTI); Indices.clear(); } } else emitRawStore(Builder, II->getOperand(0), Index, Offset, V, RTI); } static void createStoreIntrinsic(IntrinsicInst *II, StoreInst *SI, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { case dxil::ResourceKind::TypedBuffer: return createTypedBufferStore(II, SI, RTI); case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawStores(II, SI, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: case dxil::ResourceKind::Texture3D: case dxil::ResourceKind::TextureCube: case dxil::ResourceKind::Texture1DArray: case dxil::ResourceKind::Texture2DArray: case dxil::ResourceKind::Texture2DMSArray: case dxil::ResourceKind::TextureCubeArray: case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: reportFatalUsageError("DXIL Store not implemented for texture resources"); return; case dxil::ResourceKind::CBuffer: case dxil::ResourceKind::Sampler: case dxil::ResourceKind::TBuffer: case dxil::ResourceKind::RTAccelerationStructure: case dxil::ResourceKind::Invalid: case dxil::ResourceKind::NumEntries: llvm_unreachable("Invalid resource kind for store"); } llvm_unreachable("Unhandled case in switch"); } static void createTypedBufferLoad(IntrinsicInst *II, LoadInst *LI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = LI->getDataLayout(); IRBuilder<> Builder(LI); Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0); Type *LoadType = StructType::get(ContainedType, Builder.getInt1Ty()); Value *V = Builder.CreateIntrinsic(LoadType, Intrinsic::dx_resource_load_typedbuffer, {II->getOperand(0), II->getOperand(1)}); V = Builder.CreateExtractValue(V, {0}); Type *ScalarType = ContainedType->getScalarType(); uint64_t AccessSize = DL.getTypeSizeInBits(ScalarType) / 8; Value *Offset = traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize); auto *ConstantOffset = dyn_cast(Offset); if (!ConstantOffset || !ConstantOffset->isZero()) V = Builder.CreateExtractElement(V, Offset); // If we loaded a <1 x ...> instead of a scalar (presumably to feed a // shufflevector), then make sure we're maintaining the resulting type. if (auto *VT = dyn_cast(LI->getType())) if (VT->getNumElements() == 1 && !isa(V->getType())) V = Builder.CreateInsertElement(PoisonValue::get(VT), V, Builder.getInt32(0)); LI->replaceAllUsesWith(V); } static void createTextureLoad(IntrinsicInst *II, LoadInst *LI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = LI->getDataLayout(); IRBuilder<> Builder(LI); Type *ContainedType = RTI.getHandleTy()->getTypeParameter(0); Value *Handle = II->getOperand(0); Value *Coords = II->getOperand(1); // For operator[], mip level is 0. Value *MipLevel = Builder.getInt32(0); // For operator[], offsets are zero. Type *CoordTy = Coords->getType(); Type *OffsetTy; if (auto *VecTy = dyn_cast(CoordTy)) OffsetTy = FixedVectorType::get(Builder.getInt32Ty(), VecTy->getNumElements()); else OffsetTy = Builder.getInt32Ty(); Value *Offsets = Constant::getNullValue(OffsetTy); Value *V = Builder.CreateIntrinsic(ContainedType, Intrinsic::dx_resource_load_level, {Handle, Coords, MipLevel, Offsets}); Type *ScalarType = ContainedType->getScalarType(); uint64_t AccessSize = DL.getTypeSizeInBits(ScalarType) / 8; Value *Offset = traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize); auto *ConstantOffset = dyn_cast(Offset); if (!ConstantOffset || !ConstantOffset->isZero()) V = Builder.CreateExtractElement(V, Offset); // If we loaded a <1 x ...> instead of a scalar (presumably to feed a // shufflevector), then make sure we're maintaining the resulting type. if (auto *VT = dyn_cast(LI->getType())) if (VT->getNumElements() == 1 && !isa(V->getType())) V = Builder.CreateInsertElement(PoisonValue::get(VT), V, Builder.getInt32(0)); LI->replaceAllUsesWith(V); } static Value *emitRawLoad(IRBuilder<> &Builder, Type *Ty, Value *Buffer, Value *Index, Value *Offset, dxil::ResourceTypeInfo &RTI) { // For raw buffer (ie, HLSL's ByteAddressBuffer), we need to fold the access // entirely into the index. if (!RTI.isStruct()) { auto *ConstantOffset = dyn_cast(Offset); if (!ConstantOffset || !ConstantOffset->isZero()) Index = Builder.CreateAdd(Index, Offset); Offset = llvm::PoisonValue::get(Builder.getInt32Ty()); } // The load intrinsic includes the bit for CheckAccessFullyMapped, so we need // to add that to the return type. Type *TypeWithCheck = StructType::get(Ty, Builder.getInt1Ty()); Value *V = Builder.CreateIntrinsic(TypeWithCheck, Intrinsic::dx_resource_load_rawbuffer, {Buffer, Index, Offset}); return Builder.CreateExtractValue(V, {0}); } static void createRawLoads(IntrinsicInst *II, LoadInst *LI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = LI->getDataLayout(); IRBuilder<> Builder(LI); Value *Index = II->getOperand(1); // The offset for the rawbuffer load and store ops is always in bytes. uint64_t AccessSize = 1; Value *Offset = traverseGEPOffsets(DL, Builder, LI->getPointerOperand(), AccessSize); // TODO: We could make this handle aggregates by walking the structure and // handling each field individually, but we don't ever generate code that // would hit that so it seems superfluous. assert(!LI->getType()->isAggregateType() && "Resource load should be scalar or vector type"); Value *V; if (auto *VT = dyn_cast(LI->getType())) { // Split into loads of at most 4 elements. Type *EltTy = VT->getElementType(); Value *Stride = ConstantInt::get(Builder.getInt32Ty(), 4 * (DL.getTypeSizeInBits(EltTy) / 8)); SmallVector Parts; for (unsigned int I = 0, N = VT->getNumElements(); I < N; I += 4) { Type *Ty = FixedVectorType::get(EltTy, N - I < 4 ? N - I : 4); if (I > 0) Offset = Builder.CreateAdd(Offset, Stride); Parts.push_back( emitRawLoad(Builder, Ty, II->getOperand(0), Index, Offset, RTI)); } V = Parts.size() > 1 ? concatenateVectors(Builder, Parts) : Parts[0]; } else V = emitRawLoad(Builder, LI->getType(), II->getOperand(0), Index, Offset, RTI); LI->replaceAllUsesWith(V); } namespace { /// Helper for building a `load.cbufferrow` intrinsic given a simple type. struct CBufferRowIntrin { Intrinsic::ID IID; Type *RetTy; unsigned int EltSize; unsigned int NumElts; CBufferRowIntrin(const DataLayout &DL, Type *Ty) { assert(Ty == Ty->getScalarType() && "Expected scalar type"); switch (DL.getTypeSizeInBits(Ty)) { case 16: IID = Intrinsic::dx_resource_load_cbufferrow_8; RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); EltSize = 2; NumElts = 8; break; case 32: IID = Intrinsic::dx_resource_load_cbufferrow_4; RetTy = StructType::get(Ty, Ty, Ty, Ty); EltSize = 4; NumElts = 4; break; case 64: IID = Intrinsic::dx_resource_load_cbufferrow_2; RetTy = StructType::get(Ty, Ty); EltSize = 8; NumElts = 2; break; default: llvm_unreachable("Only 16, 32, and 64 bit types supported"); } } }; } // namespace static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, dxil::ResourceTypeInfo &RTI) { const DataLayout &DL = LI->getDataLayout(); Type *Ty = LI->getType(); assert(!isa(Ty) && "Structs not handled yet"); CBufferRowIntrin Intrin(DL, Ty->getScalarType()); StringRef Name = LI->getName(); Value *Handle = II->getOperand(0); IRBuilder<> Builder(LI); ConstantInt *GlobalOffset = dyn_cast(II->getOperand(1)); assert(GlobalOffset && "CBuffer getpointer index must be constant"); uint64_t GlobalOffsetVal = GlobalOffset->getZExtValue(); Value *CurrentRow = ConstantInt::get( Builder.getInt32Ty(), GlobalOffsetVal / hlsl::CBufferRowSizeInBytes); unsigned int CurrentIndex = (GlobalOffsetVal % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; // Every object in a cbuffer either fits in a row or is aligned to a row. This // means that only the very last pointer access can point into a row. auto *LastGEP = dyn_cast(LI->getPointerOperand()); if (!LastGEP) { // If we don't have a GEP at all we're just accessing the resource through // the result of getpointer directly. assert(LI->getPointerOperand() == II && "Unexpected indirect access to resource without GEP"); } else { Value *GEPOffset = traverseGEPOffsets( DL, Builder, LastGEP->getPointerOperand(), hlsl::CBufferRowSizeInBytes); CurrentRow = Builder.CreateAdd(GEPOffset, CurrentRow); APInt ConstantOffset(DL.getIndexTypeSizeInBits(LastGEP->getType()), 0); if (LastGEP->accumulateConstantOffset(DL, ConstantOffset)) { APInt Remainder(DL.getIndexTypeSizeInBits(LastGEP->getType()), hlsl::CBufferRowSizeInBytes); APInt::udivrem(ConstantOffset, Remainder, ConstantOffset, Remainder); CurrentRow = Builder.CreateAdd( CurrentRow, ConstantInt::get(Builder.getInt32Ty(), ConstantOffset)); CurrentIndex += Remainder.udiv(Intrin.EltSize).getZExtValue(); } else { assert(LastGEP->getNumIndices() == 1 && "Last GEP of cbuffer access is not array or struct access"); // We assume a non-constant access will be row-aligned. This is safe // because arrays and structs are always row aligned, and accesses to // vector elements will show up as a load of the vector followed by an // extractelement. CurrentRow = cast(CurrentRow)->isZero() ? *LastGEP->idx_begin() : Builder.CreateAdd(CurrentRow, *LastGEP->idx_begin()); CurrentIndex = 0; } } auto *CBufLoad = Builder.CreateIntrinsic( Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); // At this point we've loaded the first scalar of our result, but our original // type may have been a vector. unsigned int Remaining = ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; if (Remaining == 0) { // We only have a single element, so we're done. Value *Result = Elt; // However, if we loaded a <1 x T>, then we need to adjust the type. if (auto *VT = dyn_cast(Ty)) { assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, Builder.getInt32(0), Name); } LI->replaceAllUsesWith(Result); return; } // Walk each element and extract it, wrapping to new rows as needed. SmallVector Extracts{Elt}; while (Remaining--) { CurrentIndex %= Intrin.NumElts; if (CurrentIndex == 0) { CurrentRow = Builder.CreateAdd(CurrentRow, ConstantInt::get(Builder.getInt32Ty(), 1)); CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); } Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract")); } // Finally, we build up the original loaded value. Value *Result = PoisonValue::get(Ty); for (int I = 0, E = Extracts.size(); I < E; ++I) Result = Builder.CreateInsertElement( Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); LI->replaceAllUsesWith(Result); } static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, dxil::ResourceTypeInfo &RTI) { switch (RTI.getResourceKind()) { case dxil::ResourceKind::TypedBuffer: return createTypedBufferLoad(II, LI, RTI); case dxil::ResourceKind::RawBuffer: case dxil::ResourceKind::StructuredBuffer: return createRawLoads(II, LI, RTI); case dxil::ResourceKind::CBuffer: return createCBufferLoad(II, LI, RTI); case dxil::ResourceKind::Texture1D: case dxil::ResourceKind::Texture2D: case dxil::ResourceKind::Texture2DMS: case dxil::ResourceKind::Texture3D: case dxil::ResourceKind::TextureCube: case dxil::ResourceKind::Texture1DArray: case dxil::ResourceKind::Texture2DArray: case dxil::ResourceKind::Texture2DMSArray: case dxil::ResourceKind::TextureCubeArray: return createTextureLoad(II, LI, RTI); case dxil::ResourceKind::FeedbackTexture2D: case dxil::ResourceKind::FeedbackTexture2DArray: case dxil::ResourceKind::TBuffer: reportFatalUsageError("Load not yet implemented for resource type"); return; case dxil::ResourceKind::Sampler: case dxil::ResourceKind::RTAccelerationStructure: case dxil::ResourceKind::Invalid: case dxil::ResourceKind::NumEntries: llvm_unreachable("Invalid resource kind for load"); } llvm_unreachable("Unhandled case in switch"); } static Instruction *getStoreLoadPointerOperand(Instruction *AI) { if (auto *LI = dyn_cast(AI)) return dyn_cast(LI->getPointerOperand()); if (auto *SI = dyn_cast(AI)) return dyn_cast(SI->getPointerOperand()); return nullptr; } static const std::array HandleIntrins = { Intrinsic::dx_resource_handlefrombinding, Intrinsic::dx_resource_handlefromimplicitbinding, }; static SmallVector collectUsedHandles(Value *Ptr) { SmallVector Worklist = {Ptr}; SmallVector Handles; while (!Worklist.empty()) { Value *X = Worklist.pop_back_val(); if (!X->getType()->isPointerTy() && !X->getType()->isTargetExtTy()) return {}; // Early exit on store/load into non-resource if (auto *Phi = dyn_cast(X)) for (Use &V : Phi->incoming_values()) Worklist.push_back(V.get()); else if (auto *Select = dyn_cast(X)) for (Value *V : {Select->getTrueValue(), Select->getFalseValue()}) Worklist.push_back(V); else if (auto *II = dyn_cast(X)) { Intrinsic::ID IID = II->getIntrinsicID(); if (IID == Intrinsic::dx_resource_getpointer) Worklist.push_back(II->getArgOperand(/*Handle=*/0)); if (llvm::is_contained(HandleIntrins, IID)) Handles.push_back(II); } } return Handles; } static hlsl::Binding getHandleIntrinsicBinding(IntrinsicInst *Handle, DXILResourceTypeMap &DRTM) { assert(llvm::is_contained(HandleIntrins, Handle->getIntrinsicID()) && "Only expects a Handle as determined from collectUsedHandles."); auto *HandleTy = cast(Handle->getType()); dxil::ResourceClass Class = DRTM[HandleTy].getResourceClass(); uint32_t Space = cast(Handle->getArgOperand(0))->getZExtValue(); uint32_t LowerBound = cast(Handle->getArgOperand(1))->getZExtValue(); uint32_t Size = cast(Handle->getArgOperand(2))->getZExtValue(); uint32_t UpperBound = Size == UINT32_MAX ? UINT32_MAX : LowerBound + Size - 1; return hlsl::Binding(Class, Space, LowerBound, UpperBound, nullptr); } namespace { /// Helper for propagating the current handle and ptr indices. struct AccessIndices { Value *GetPtrIdx; Value *HandleIdx; bool hasGetPtrIdx() { return GetPtrIdx != nullptr; } bool hasHandleIdx() { return HandleIdx != nullptr; } }; } // namespace // getAccessIndices traverses up the control flow that a ptr came from and // propagates back the indicies used to access the resource (AccessIndices): // // - GetPtrIdx is the index of dx.resource.getpointer // - HandleIdx is the index of dx.resource.handlefrom.* static AccessIndices getAccessIndices(Instruction *I, SmallSetVector &DeadInsts) { if (auto *II = dyn_cast(I)) { if (llvm::is_contained(HandleIntrins, II->getIntrinsicID())) { DeadInsts.insert(II); return {nullptr, II->getArgOperand(/*Index=*/3)}; } if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) { auto *V = dyn_cast(II->getArgOperand(/*Handle=*/0)); auto AccessIdx = getAccessIndices(V, DeadInsts); assert(!AccessIdx.hasGetPtrIdx() && "Encountered multiple dx.resource.getpointers in ptr chain?"); AccessIdx.GetPtrIdx = II->getArgOperand(1); DeadInsts.insert(II); return AccessIdx; } } if (auto *Phi = dyn_cast(I)) { unsigned NumEdges = Phi->getNumIncomingValues(); assert(NumEdges != 0 && "Malformed Phi Node"); IRBuilder<> Builder(Phi); PHINode *GetPtrPhi = PHINode::Create(Builder.getInt32Ty(), NumEdges); PHINode *HandlePhi = PHINode::Create(Builder.getInt32Ty(), NumEdges); bool HasGetPtr = true; for (unsigned Idx = 0; Idx < NumEdges; Idx++) { auto *BB = Phi->getIncomingBlock(Idx); auto *V = dyn_cast(Phi->getIncomingValue(Idx)); auto AccessIdx = getAccessIndices(V, DeadInsts); HasGetPtr &= AccessIdx.hasGetPtrIdx(); if (HasGetPtr) GetPtrPhi->addIncoming(AccessIdx.GetPtrIdx, BB); HandlePhi->addIncoming(AccessIdx.HandleIdx, BB); } if (HasGetPtr) Builder.Insert(GetPtrPhi); else GetPtrPhi = nullptr; Builder.Insert(HandlePhi); DeadInsts.insert(Phi); return {GetPtrPhi, HandlePhi}; } if (auto *Select = dyn_cast(I)) { auto *TrueV = dyn_cast(Select->getTrueValue()); auto TrueAccessIdx = getAccessIndices(TrueV, DeadInsts); auto *FalseV = dyn_cast(Select->getFalseValue()); auto FalseAccessIdx = getAccessIndices(FalseV, DeadInsts); IRBuilder<> Builder(Select); Value *GetPtrSelect = nullptr; if (TrueAccessIdx.hasGetPtrIdx() && FalseAccessIdx.hasGetPtrIdx()) GetPtrSelect = Builder.CreateSelect(Select->getCondition(), TrueAccessIdx.GetPtrIdx, FalseAccessIdx.GetPtrIdx); auto *HandleSelect = Builder.CreateSelect(Select->getCondition(), TrueAccessIdx.HandleIdx, FalseAccessIdx.HandleIdx); DeadInsts.insert(Select); return {GetPtrSelect, HandleSelect}; } llvm_unreachable("collectUsedHandles should assure this does not occur"); } static void replaceHandleWithIndices(Instruction *Ptr, IntrinsicInst *OldHandle, SmallSetVector &DeadInsts) { auto AccessIdx = getAccessIndices(Ptr, DeadInsts); assert(AccessIdx.hasGetPtrIdx() && AccessIdx.hasHandleIdx() && "Couldn't retrieve indices. This is guaranteed by getAccessIndices"); IRBuilder<> Builder(Ptr); IntrinsicInst *Handle = cast(OldHandle->clone()); Handle->setArgOperand(/*Index=*/3, AccessIdx.HandleIdx); Builder.Insert(Handle); auto *GetPtr = Builder.CreateIntrinsic(Ptr->getType(), Intrinsic::dx_resource_getpointer, {Handle, AccessIdx.GetPtrIdx}); Ptr->replaceAllUsesWith(GetPtr); DeadInsts.insert(Ptr); } // Try to legalize dx.resource.handlefrom.*.binding and dx.resource.getpointer // calls with their respective index values and propagate the index values to // be used at resource access. // // If it can't be transformed to be legal then: // // Reports an error if a resource access is not guaranteed into a unique global // resource. // // Returns true if any changes are made. static bool legalizeResourceHandles(Function &F, DXILResourceTypeMap &DRTM) { SmallSetVector DeadInsts; for (BasicBlock &BB : make_early_inc_range(F)) { for (Instruction &I : BB) { if (auto *PtrOp = getStoreLoadPointerOperand(&I)) { SmallVector Handles = collectUsedHandles(PtrOp); unsigned NumHandles = Handles.size(); if (NumHandles <= 1) continue; // Legal, no-replacement required bool SameGlobalBinding = true; hlsl::Binding B = getHandleIntrinsicBinding(Handles[0], DRTM); for (unsigned Idx = 1; Idx < NumHandles; Idx++) SameGlobalBinding &= (B == getHandleIntrinsicBinding(Handles[Idx], DRTM)); if (!SameGlobalBinding) { diagnoseNonUniqueResourceAccess(&I, Handles); continue; } replaceHandleWithIndices(PtrOp, Handles[0], DeadInsts); } } } bool MadeChanges = false; for (auto *I : llvm::reverse(DeadInsts)) if (I->hasNUses(0)) { // Handle can still be used outside of replaced path I->eraseFromParent(); MadeChanges = true; } return MadeChanges; } static void replaceAccess(IntrinsicInst *II, dxil::ResourceTypeInfo &RTI) { SmallVector Worklist; for (User *U : II->users()) Worklist.push_back(U); SmallVector DeadInsts; while (!Worklist.empty()) { User *U = Worklist.back(); Worklist.pop_back(); if (auto *GEP = dyn_cast(U)) { for (User *U : GEP->users()) Worklist.push_back(U); DeadInsts.push_back(GEP); } else if (auto *SI = dyn_cast(U)) { assert(SI->getValueOperand() != II && "Pointer escaped!"); createStoreIntrinsic(II, SI, RTI); DeadInsts.push_back(SI); } else if (auto *LI = dyn_cast(U)) { createLoadIntrinsic(II, LI, RTI); DeadInsts.push_back(LI); } else llvm_unreachable("Unhandled instruction - pointer escaped?"); } // Traverse the now-dead instructions in RPO and remove them. for (Instruction *Dead : llvm::reverse(DeadInsts)) Dead->eraseFromParent(); II->eraseFromParent(); } static bool transformResourcePointers(Function &F, DXILResourceTypeMap &DRTM) { SmallVector> Resources; for (BasicBlock &BB : make_early_inc_range(F)) for (Instruction &I : BB) if (auto *II = dyn_cast(&I)) if (II->getIntrinsicID() == Intrinsic::dx_resource_getpointer) { auto *HandleTy = cast(II->getArgOperand(0)->getType()); Resources.emplace_back(II, DRTM[HandleTy]); } for (auto &[II, RI] : Resources) replaceAccess(II, RI); return !Resources.empty(); } PreservedAnalyses DXILResourceAccess::run(Function &F, FunctionAnalysisManager &FAM) { auto &MAMProxy = FAM.getResult(F); DXILResourceTypeMap *DRTM = MAMProxy.getCachedResult(*F.getParent()); assert(DRTM && "DXILResourceTypeAnalysis must be available"); bool MadeHandleChanges = legalizeResourceHandles(F, *DRTM); bool MadeResourceChanges = transformResourcePointers(F, *DRTM); if (!(MadeHandleChanges || MadeResourceChanges)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); return PA; } namespace { class DXILResourceAccessLegacy : public FunctionPass { public: bool runOnFunction(Function &F) override { DXILResourceTypeMap &DRTM = getAnalysis().getResourceTypeMap(); bool MadeHandleChanges = legalizeResourceHandles(F, DRTM); bool MadeResourceChanges = transformResourcePointers(F, DRTM); return MadeHandleChanges || MadeResourceChanges; } StringRef getPassName() const override { return "DXIL Resource Access"; } DXILResourceAccessLegacy() : FunctionPass(ID) {} static char ID; // Pass identification. void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); } }; char DXILResourceAccessLegacy::ID = 0; } // end anonymous namespace INITIALIZE_PASS_BEGIN(DXILResourceAccessLegacy, DEBUG_TYPE, "DXIL Resource Access", false, false) INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass) INITIALIZE_PASS_END(DXILResourceAccessLegacy, DEBUG_TYPE, "DXIL Resource Access", false, false) FunctionPass *llvm::createDXILResourceAccessLegacyPass() { return new DXILResourceAccessLegacy(); }