From issue #99105, #99076, #99090, #99106 and adds the implementation of DeviceMemoryBarrier(WithGroupSync) and AllMemoryBarrier(WithGroupSync) to DXIL and SPIRV.
1677 lines
70 KiB
C++
1677 lines
70 KiB
C++
//===------- CGHLSLBuiltins.cpp - Emit LLVM Code for HLSL builtins --------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This contains code to emit HLSL Builtin calls as LLVM code.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "CGBuiltin.h"
|
|
#include "CGHLSLRuntime.h"
|
|
#include "CodeGenFunction.h"
|
|
#include "llvm/IR/MatrixBuilder.h"
|
|
|
|
using namespace clang;
|
|
using namespace CodeGen;
|
|
using namespace llvm;
|
|
|
|
static Value *handleAsDoubleBuiltin(CodeGenFunction &CGF, const CallExpr *E) {
|
|
assert((E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
|
|
E->getArg(1)->getType()->hasUnsignedIntegerRepresentation()) &&
|
|
"asdouble operands types mismatch");
|
|
Value *OpLowBits = CGF.EmitScalarExpr(E->getArg(0));
|
|
Value *OpHighBits = CGF.EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Type *ResultType = CGF.DoubleTy;
|
|
int N = 1;
|
|
if (auto *VTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
|
|
N = VTy->getNumElements();
|
|
ResultType = llvm::FixedVectorType::get(CGF.DoubleTy, N);
|
|
}
|
|
|
|
if (CGF.CGM.getTarget().getTriple().isDXIL())
|
|
return CGF.Builder.CreateIntrinsic(
|
|
/*ReturnType=*/ResultType, Intrinsic::dx_asdouble,
|
|
{OpLowBits, OpHighBits}, nullptr, "hlsl.asdouble");
|
|
|
|
if (!E->getArg(0)->getType()->isVectorType()) {
|
|
OpLowBits = CGF.Builder.CreateVectorSplat(1, OpLowBits);
|
|
OpHighBits = CGF.Builder.CreateVectorSplat(1, OpHighBits);
|
|
}
|
|
|
|
llvm::SmallVector<int> Mask;
|
|
for (int i = 0; i < N; i++) {
|
|
Mask.push_back(i);
|
|
Mask.push_back(i + N);
|
|
}
|
|
|
|
Value *BitVec = CGF.Builder.CreateShuffleVector(OpLowBits, OpHighBits, Mask);
|
|
|
|
return CGF.Builder.CreateBitCast(BitVec, ResultType);
|
|
}
|
|
|
|
static Value *handleHlslClip(const CallExpr *E, CodeGenFunction *CGF) {
|
|
Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
|
|
|
|
Constant *FZeroConst = ConstantFP::getZero(CGF->FloatTy);
|
|
Value *CMP;
|
|
Value *LastInstr;
|
|
|
|
if (const auto *VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>()) {
|
|
FZeroConst = ConstantVector::getSplat(
|
|
ElementCount::getFixed(VecTy->getNumElements()), FZeroConst);
|
|
auto *FCompInst = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
|
|
CMP = CGF->Builder.CreateIntrinsic(
|
|
CGF->Builder.getInt1Ty(), CGF->CGM.getHLSLRuntime().getAnyIntrinsic(),
|
|
{FCompInst});
|
|
} else {
|
|
CMP = CGF->Builder.CreateFCmpOLT(Op0, FZeroConst);
|
|
}
|
|
|
|
if (CGF->CGM.getTarget().getTriple().isDXIL()) {
|
|
LastInstr = CGF->Builder.CreateIntrinsic(Intrinsic::dx_discard, {CMP});
|
|
} else if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
|
|
BasicBlock *LT0 = CGF->createBasicBlock("lt0", CGF->CurFn);
|
|
BasicBlock *End = CGF->createBasicBlock("end", CGF->CurFn);
|
|
|
|
CGF->Builder.CreateCondBr(CMP, LT0, End);
|
|
|
|
CGF->Builder.SetInsertPoint(LT0);
|
|
|
|
CGF->Builder.CreateIntrinsic(Intrinsic::spv_discard, {});
|
|
|
|
LastInstr = CGF->Builder.CreateBr(End);
|
|
CGF->Builder.SetInsertPoint(End);
|
|
} else {
|
|
llvm_unreachable("Backend Codegen not supported.");
|
|
}
|
|
|
|
return LastInstr;
|
|
}
|
|
|
|
static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
|
|
Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
|
|
const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
|
|
const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
|
|
|
|
CallArgList Args;
|
|
LValue Op1TmpLValue =
|
|
CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
|
|
LValue Op2TmpLValue =
|
|
CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
|
|
|
|
if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
|
|
Args.reverseWritebacks();
|
|
|
|
Value *LowBits = nullptr;
|
|
Value *HighBits = nullptr;
|
|
|
|
if (CGF->CGM.getTarget().getTriple().isDXIL()) {
|
|
llvm::Type *RetElementTy = CGF->Int32Ty;
|
|
if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
|
|
RetElementTy = llvm::VectorType::get(
|
|
CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
|
|
auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
|
|
|
|
CallInst *CI = CGF->Builder.CreateIntrinsic(
|
|
RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
|
|
|
|
LowBits = CGF->Builder.CreateExtractValue(CI, 0);
|
|
HighBits = CGF->Builder.CreateExtractValue(CI, 1);
|
|
} else {
|
|
// For Non DXIL targets we generate the instructions.
|
|
|
|
if (!Op0->getType()->isVectorTy()) {
|
|
FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
|
|
Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
|
|
|
|
LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
|
|
HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
|
|
} else {
|
|
int NumElements = 1;
|
|
if (const auto *VecTy =
|
|
E->getArg(0)->getType()->getAs<clang::VectorType>())
|
|
NumElements = VecTy->getNumElements();
|
|
|
|
FixedVectorType *Uint32VecTy =
|
|
FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
|
|
Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
|
|
if (NumElements == 1) {
|
|
LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
|
|
HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
|
|
} else {
|
|
SmallVector<int> EvenMask, OddMask;
|
|
for (int I = 0, E = NumElements; I != E; ++I) {
|
|
EvenMask.push_back(I * 2);
|
|
OddMask.push_back(I * 2 + 1);
|
|
}
|
|
LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
|
|
HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
|
|
}
|
|
}
|
|
}
|
|
CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
|
|
auto *LastInst =
|
|
CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
|
|
CGF->EmitWritebacks(Args);
|
|
return LastInst;
|
|
}
|
|
|
|
static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
|
|
const CallExpr *E) {
|
|
Value *Cond = CGF.EmitScalarExpr(E->getArg(0));
|
|
llvm::Type *I32 = CGF.Int32Ty;
|
|
|
|
llvm::Type *Vec4I32 = llvm::FixedVectorType::get(I32, 4);
|
|
[[maybe_unused]] llvm::StructType *Struct4I32 =
|
|
llvm::StructType::get(CGF.getLLVMContext(), {I32, I32, I32, I32});
|
|
|
|
if (CGF.CGM.getTarget().getTriple().isDXIL()) {
|
|
// Call DXIL intrinsic: returns { i32, i32, i32, i32 }
|
|
llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32});
|
|
|
|
Value *StructVal = CGF.EmitRuntimeCall(Fn, Cond);
|
|
assert(StructVal->getType() == Struct4I32 &&
|
|
"dx.wave.ballot must return {i32,i32,i32,i32}");
|
|
|
|
// Reassemble struct to <4 x i32>
|
|
llvm::Value *VecVal = llvm::PoisonValue::get(Vec4I32);
|
|
for (unsigned I = 0; I < 4; ++I) {
|
|
Value *Elt = CGF.Builder.CreateExtractValue(StructVal, I);
|
|
VecVal =
|
|
CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(I));
|
|
}
|
|
|
|
return VecVal;
|
|
}
|
|
|
|
if (CGF.CGM.getTarget().getTriple().isSPIRV())
|
|
return CGF.EmitRuntimeCall(
|
|
CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond);
|
|
|
|
llvm_unreachable(
|
|
"WaveActiveBallot is only supported for DXIL and SPIRV targets");
|
|
}
|
|
|
|
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
|
|
const CallExpr *E) {
|
|
Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
|
|
QualType Op0Ty = E->getArg(0)->getType();
|
|
llvm::Type *ResType = CGF.FloatTy;
|
|
uint64_t NumElements = 0;
|
|
if (Op0->getType()->isVectorTy()) {
|
|
NumElements =
|
|
E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
|
|
ResType =
|
|
llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
|
|
}
|
|
if (!Op0Ty->hasUnsignedIntegerRepresentation())
|
|
llvm_unreachable(
|
|
"f16tof32 operand must have an unsigned int representation");
|
|
|
|
if (CGF.CGM.getTriple().isDXIL())
|
|
return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.f16tof32");
|
|
|
|
if (CGF.CGM.getTriple().isSPIRV()) {
|
|
// We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
|
|
// Int16 and Float16 capabilities
|
|
auto *UnpackType =
|
|
llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
|
|
|
|
if (NumElements == 0) {
|
|
// a scalar input - simply extract the first element of the unpacked
|
|
// vector
|
|
Value *Unpack = CGF.Builder.CreateIntrinsic(
|
|
UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
|
|
return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
|
|
}
|
|
|
|
// a vector input - build a congruent output vector by iterating through
|
|
// the input vector calling unpackhalf2x16 for each element
|
|
Value *Result = PoisonValue::get(ResType);
|
|
for (uint64_t I = 0; I < NumElements; I++) {
|
|
Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
|
|
Value *Unpack = CGF.Builder.CreateIntrinsic(
|
|
UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
|
|
Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
|
|
Result = CGF.Builder.CreateInsertElement(Result, Res, I);
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
|
|
}
|
|
|
|
static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
|
|
const CallExpr *E) {
|
|
Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
|
|
QualType Op0Ty = E->getArg(0)->getType();
|
|
llvm::Type *ResType = CGF.IntTy;
|
|
uint64_t NumElements = 0;
|
|
if (Op0->getType()->isVectorTy()) {
|
|
NumElements =
|
|
E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
|
|
ResType =
|
|
llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
|
|
}
|
|
if (!Op0Ty->hasFloatingRepresentation())
|
|
llvm_unreachable("f32tof16 operand must have a float representation");
|
|
|
|
if (CGF.CGM.getTriple().isDXIL())
|
|
return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.f32tof16");
|
|
|
|
if (CGF.CGM.getTriple().isSPIRV()) {
|
|
// We use the SPIRV PackHalf2x16 operation to avoid the need for the
|
|
// Int16 and Float16 capabilities
|
|
auto *PackType =
|
|
llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
|
|
|
|
if (NumElements == 0) {
|
|
// a scalar input - simply insert the scalar in the first element
|
|
// of the 2 element float vector
|
|
Value *Float2 = Constant::getNullValue(PackType);
|
|
Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
|
|
Value *Result = CGF.Builder.CreateIntrinsic(
|
|
ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
|
|
return Result;
|
|
}
|
|
|
|
// a vector input - build a congruent output vector by iterating through
|
|
// the input vector calling packhalf2x16 for each element
|
|
Value *Result = PoisonValue::get(ResType);
|
|
for (uint64_t I = 0; I < NumElements; I++) {
|
|
Value *Float2 = Constant::getNullValue(PackType);
|
|
Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
|
|
Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
|
|
Value *Res = CGF.Builder.CreateIntrinsic(
|
|
CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
|
|
Result = CGF.Builder.CreateInsertElement(Result, Res, I);
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
|
|
}
|
|
|
|
static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
|
|
LValue &Stride) {
|
|
// Figure out the stride of the buffer elements from the handle type.
|
|
auto *HandleTy =
|
|
cast<HLSLAttributedResourceType>(HandleExpr->getType().getTypePtr());
|
|
QualType ElementTy = HandleTy->getContainedType();
|
|
Value *StrideValue = CGF->getTypeSize(ElementTy);
|
|
return CGF->Builder.CreateStore(StrideValue, Stride.getAddress());
|
|
}
|
|
|
|
// Return dot product intrinsic that corresponds to the QT scalar type
|
|
static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
|
|
if (QT->isFloatingType())
|
|
return RT.getFDotIntrinsic();
|
|
if (QT->isSignedIntegerType())
|
|
return RT.getSDotIntrinsic();
|
|
assert(QT->isUnsignedIntegerType());
|
|
return RT.getUDotIntrinsic();
|
|
}
|
|
|
|
static Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
|
|
if (QT->hasSignedIntegerRepresentation()) {
|
|
return RT.getFirstBitSHighIntrinsic();
|
|
}
|
|
|
|
assert(QT->hasUnsignedIntegerRepresentation());
|
|
return RT.getFirstBitUHighIntrinsic();
|
|
}
|
|
|
|
// Return wave active sum that corresponds to the QT scalar type
|
|
static Intrinsic::ID getWaveActiveSumIntrinsic(llvm::Triple::ArchType Arch,
|
|
QualType QT) {
|
|
switch (Arch) {
|
|
case llvm::Triple::spirv:
|
|
return Intrinsic::spv_wave_reduce_sum;
|
|
case llvm::Triple::dxil: {
|
|
if (QT->isUnsignedIntegerType())
|
|
return Intrinsic::dx_wave_reduce_usum;
|
|
return Intrinsic::dx_wave_reduce_sum;
|
|
}
|
|
default:
|
|
llvm_unreachable("Intrinsic WaveActiveSum"
|
|
" not supported by target architecture");
|
|
}
|
|
}
|
|
|
|
// Return wave active product that corresponds to the QT scalar type
|
|
static Intrinsic::ID getWaveActiveProductIntrinsic(llvm::Triple::ArchType Arch,
|
|
QualType QT) {
|
|
switch (Arch) {
|
|
case llvm::Triple::spirv:
|
|
return Intrinsic::spv_wave_product;
|
|
case llvm::Triple::dxil: {
|
|
if (QT->isUnsignedIntegerType())
|
|
return Intrinsic::dx_wave_uproduct;
|
|
return Intrinsic::dx_wave_product;
|
|
}
|
|
default:
|
|
llvm_unreachable("Intrinsic WaveActiveProduct"
|
|
" not supported by target architecture");
|
|
}
|
|
}
|
|
|
|
static Intrinsic::ID getPrefixCountBitsIntrinsic(llvm::Triple::ArchType Arch) {
|
|
switch (Arch) {
|
|
case llvm::Triple::spirv:
|
|
return Intrinsic::spv_subgroup_prefix_bit_count;
|
|
case llvm::Triple::dxil: {
|
|
return Intrinsic::dx_wave_prefix_bit_count;
|
|
}
|
|
default:
|
|
llvm_unreachable(
|
|
"WavePrefixOp instruction not supported by target architecture");
|
|
}
|
|
}
|
|
|
|
// Return wave prefix sum that corresponds to the QT scalar type
|
|
static Intrinsic::ID getWavePrefixSumIntrinsic(llvm::Triple::ArchType Arch,
|
|
QualType QT) {
|
|
switch (Arch) {
|
|
case llvm::Triple::spirv:
|
|
return Intrinsic::spv_wave_prefix_sum;
|
|
case llvm::Triple::dxil: {
|
|
if (QT->isUnsignedIntegerType())
|
|
return Intrinsic::dx_wave_prefix_usum;
|
|
return Intrinsic::dx_wave_prefix_sum;
|
|
}
|
|
default:
|
|
llvm_unreachable("Intrinsic WavePrefixSum"
|
|
" not supported by target architecture");
|
|
}
|
|
}
|
|
|
|
// Return wave prefix product that corresponds to the QT scalar type
|
|
static Intrinsic::ID getWavePrefixProductIntrinsic(llvm::Triple::ArchType Arch,
|
|
QualType QT) {
|
|
switch (Arch) {
|
|
case llvm::Triple::spirv:
|
|
return Intrinsic::spv_wave_prefix_product;
|
|
case llvm::Triple::dxil: {
|
|
if (QT->isUnsignedIntegerType())
|
|
return Intrinsic::dx_wave_prefix_uproduct;
|
|
return Intrinsic::dx_wave_prefix_product;
|
|
}
|
|
default:
|
|
llvm_unreachable("Intrinsic WavePrefixProduct"
|
|
" not supported by target architecture");
|
|
}
|
|
}
|
|
|
|
// Returns the mangled name for a builtin function that the SPIR-V backend
|
|
// will expand into a spec Constant.
|
|
static std::string getSpecConstantFunctionName(clang::QualType SpecConstantType,
|
|
ASTContext &Context) {
|
|
// The parameter types for our conceptual intrinsic function.
|
|
QualType ClangParamTypes[] = {Context.IntTy, SpecConstantType};
|
|
|
|
// Create a temporary FunctionDecl for the builtin fuction. It won't be
|
|
// added to the AST.
|
|
FunctionProtoType::ExtProtoInfo EPI;
|
|
QualType FnType =
|
|
Context.getFunctionType(SpecConstantType, ClangParamTypes, EPI);
|
|
DeclarationName FuncName = &Context.Idents.get("__spirv_SpecConstant");
|
|
FunctionDecl *FnDeclForMangling = FunctionDecl::Create(
|
|
Context, Context.getTranslationUnitDecl(), SourceLocation(),
|
|
SourceLocation(), FuncName, FnType, /*TSI=*/nullptr, SC_Extern);
|
|
|
|
// Attach the created parameter declarations to the function declaration.
|
|
SmallVector<ParmVarDecl *, 2> ParamDecls;
|
|
for (QualType ParamType : ClangParamTypes) {
|
|
ParmVarDecl *PD = ParmVarDecl::Create(
|
|
Context, FnDeclForMangling, SourceLocation(), SourceLocation(),
|
|
/*IdentifierInfo*/ nullptr, ParamType, /*TSI*/ nullptr, SC_None,
|
|
/*DefaultArg*/ nullptr);
|
|
ParamDecls.push_back(PD);
|
|
}
|
|
FnDeclForMangling->setParams(ParamDecls);
|
|
|
|
// Get the mangled name.
|
|
std::string Name;
|
|
llvm::raw_string_ostream MangledNameStream(Name);
|
|
std::unique_ptr<MangleContext> Mangler(Context.createMangleContext());
|
|
Mangler->mangleName(FnDeclForMangling, MangledNameStream);
|
|
MangledNameStream.flush();
|
|
|
|
return Name;
|
|
}
|
|
|
|
static llvm::Type *getOffsetType(CodeGenModule &CGM, llvm::Type *CoordTy) {
|
|
llvm::Type *Int32Ty = CGM.Int32Ty;
|
|
if (auto *VT = dyn_cast<llvm::FixedVectorType>(CoordTy))
|
|
return llvm::FixedVectorType::get(Int32Ty, VT->getNumElements());
|
|
return Int32Ty;
|
|
}
|
|
|
|
static Value *emitHlslOffset(CodeGenFunction &CGF, const CallExpr *E,
|
|
unsigned OffsetArgIndex, llvm::Type *OffsetTy) {
|
|
if (E->getNumArgs() > OffsetArgIndex)
|
|
return CGF.EmitScalarExpr(E->getArg(OffsetArgIndex));
|
|
|
|
return llvm::Constant::getNullValue(OffsetTy);
|
|
}
|
|
|
|
static Value *emitHlslClamp(CodeGenFunction &CGF, const CallExpr *E,
|
|
unsigned ClampArgIndex) {
|
|
Value *Clamp = CGF.EmitScalarExpr(E->getArg(ClampArgIndex));
|
|
// The builtin is defined with variadic arguments, so the clamp parameter
|
|
// might have been promoted to double. The intrinsic requires a 32-bit
|
|
// float.
|
|
if (Clamp->getType() != CGF.Builder.getFloatTy())
|
|
Clamp = CGF.Builder.CreateFPCast(Clamp, CGF.Builder.getFloatTy());
|
|
return Clamp;
|
|
}
|
|
|
|
static Value *emitGetDimensions(CodeGenFunction &CGF, const CallExpr *E,
|
|
unsigned IntrinsicID, unsigned NumRetComps,
|
|
bool HasLod) {
|
|
Value *Handle = CGF.EmitScalarExpr(E->getArg(0));
|
|
|
|
SmallVector<Value *> Args{Handle};
|
|
if (HasLod)
|
|
Args.push_back(CGF.EmitScalarExpr(E->getArg(1)));
|
|
|
|
Value *DimValue =
|
|
CGF.Builder.CreateIntrinsic(IntrinsicID, {Handle->getType()}, Args);
|
|
|
|
Value *LastStore = nullptr;
|
|
unsigned ArgIndex = HasLod ? 2 : 1;
|
|
for (unsigned i = 0; i < NumRetComps; ++i) {
|
|
const Expr *Arg = E->getArg(ArgIndex++);
|
|
LValue DimOut = CGF.EmitLValue(Arg);
|
|
Value *Elem = DimValue;
|
|
if (NumRetComps > 1)
|
|
Elem = CGF.Builder.CreateExtractElement(DimValue, i);
|
|
|
|
// Handle float casting if needed
|
|
if (Arg->getType()->isFloatingType())
|
|
Elem = CGF.Builder.CreateUIToFP(
|
|
Elem, llvm::Type::getFloatTy(CGF.getLLVMContext()));
|
|
|
|
LastStore = CGF.Builder.CreateStore(Elem, DimOut.getAddress());
|
|
}
|
|
return LastStore;
|
|
}
|
|
|
|
Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
|
|
const CallExpr *E,
|
|
ReturnValueSlot ReturnValue) {
|
|
if (!getLangOpts().HLSL)
|
|
return nullptr;
|
|
|
|
switch (BuiltinID) {
|
|
case Builtin::BI__builtin_hlsl_adduint64: {
|
|
Value *OpA = EmitScalarExpr(E->getArg(0));
|
|
Value *OpB = EmitScalarExpr(E->getArg(1));
|
|
QualType Arg0Ty = E->getArg(0)->getType();
|
|
uint64_t NumElements = Arg0Ty->castAs<VectorType>()->getNumElements();
|
|
assert(Arg0Ty == E->getArg(1)->getType() &&
|
|
"AddUint64 operand types must match");
|
|
assert(Arg0Ty->hasIntegerRepresentation() &&
|
|
"AddUint64 operands must have an integer representation");
|
|
assert((NumElements == 2 || NumElements == 4) &&
|
|
"AddUint64 operands must have 2 or 4 elements");
|
|
|
|
llvm::Value *LowA;
|
|
llvm::Value *HighA;
|
|
llvm::Value *LowB;
|
|
llvm::Value *HighB;
|
|
|
|
// Obtain low and high words of inputs A and B
|
|
if (NumElements == 2) {
|
|
LowA = Builder.CreateExtractElement(OpA, (uint64_t)0, "LowA");
|
|
HighA = Builder.CreateExtractElement(OpA, (uint64_t)1, "HighA");
|
|
LowB = Builder.CreateExtractElement(OpB, (uint64_t)0, "LowB");
|
|
HighB = Builder.CreateExtractElement(OpB, (uint64_t)1, "HighB");
|
|
} else {
|
|
LowA = Builder.CreateShuffleVector(OpA, {0, 2}, "LowA");
|
|
HighA = Builder.CreateShuffleVector(OpA, {1, 3}, "HighA");
|
|
LowB = Builder.CreateShuffleVector(OpB, {0, 2}, "LowB");
|
|
HighB = Builder.CreateShuffleVector(OpB, {1, 3}, "HighB");
|
|
}
|
|
|
|
// Use an uadd_with_overflow to compute the sum of low words and obtain a
|
|
// carry value
|
|
llvm::Value *Carry;
|
|
llvm::Value *LowSum = EmitOverflowIntrinsic(
|
|
*this, Intrinsic::uadd_with_overflow, LowA, LowB, Carry);
|
|
llvm::Value *ZExtCarry =
|
|
Builder.CreateZExt(Carry, HighA->getType(), "CarryZExt");
|
|
|
|
// Sum the high words and the carry
|
|
llvm::Value *HighSum = Builder.CreateAdd(HighA, HighB, "HighSum");
|
|
llvm::Value *HighSumPlusCarry =
|
|
Builder.CreateAdd(HighSum, ZExtCarry, "HighSumPlusCarry");
|
|
|
|
if (NumElements == 4) {
|
|
return Builder.CreateShuffleVector(LowSum, HighSumPlusCarry, {0, 2, 1, 3},
|
|
"hlsl.AddUint64");
|
|
}
|
|
|
|
llvm::Value *Result = PoisonValue::get(OpA->getType());
|
|
Result = Builder.CreateInsertElement(Result, LowSum, (uint64_t)0,
|
|
"hlsl.AddUint64.upto0");
|
|
Result = Builder.CreateInsertElement(Result, HighSumPlusCarry, (uint64_t)1,
|
|
"hlsl.AddUint64");
|
|
return Result;
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_getpointer:
|
|
case Builtin::BI__builtin_hlsl_resource_getpointer_typed: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *IndexOp = EmitScalarExpr(E->getArg(1));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getCreateResourceGetPointerIntrinsic(),
|
|
ArrayRef<Value *>{HandleOp, IndexOp});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
|
|
SmallVector<Value *, 4> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 3, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
if (E->getNumArgs() <= 4) {
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleIntrinsic(), Args);
|
|
}
|
|
|
|
Args.push_back(emitHlslClamp(*this, E, 4));
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleClampIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample_bias: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *BiasOp = EmitScalarExpr(E->getArg(3));
|
|
if (BiasOp->getType() != Builder.getFloatTy())
|
|
BiasOp = Builder.CreateFPCast(BiasOp, Builder.getFloatTy());
|
|
|
|
SmallVector<Value *, 6> Args; // Max 6 arguments for SampleBias
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(BiasOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
if (E->getNumArgs() <= 5)
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleBiasIntrinsic(), Args);
|
|
|
|
Args.push_back(emitHlslClamp(*this, E, 5));
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleBiasClampIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample_grad: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *DDXOp = EmitScalarExpr(E->getArg(3));
|
|
Value *DDYOp = EmitScalarExpr(E->getArg(4));
|
|
|
|
SmallVector<Value *, 7> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(DDXOp);
|
|
Args.push_back(DDYOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
|
|
if (E->getNumArgs() <= 6) {
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleGradIntrinsic(), Args);
|
|
}
|
|
|
|
Args.push_back(emitHlslClamp(*this, E, 6));
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleGradClampIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample_level: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *LODOp = EmitScalarExpr(E->getArg(3));
|
|
if (LODOp->getType() != Builder.getFloatTy())
|
|
LODOp = Builder.CreateFPCast(LODOp, Builder.getFloatTy());
|
|
|
|
SmallVector<Value *, 5> Args; // Max 5 arguments for SampleLevel
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(LODOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleLevelIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_load_level: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *CoordLODOp = EmitScalarExpr(E->getArg(1));
|
|
|
|
auto *CoordLODVecTy = cast<llvm::FixedVectorType>(CoordLODOp->getType());
|
|
unsigned NumElts = CoordLODVecTy->getNumElements();
|
|
assert(NumElts >= 2 && "CoordLOD must have at least 2 elements");
|
|
|
|
// Split CoordLOD into Coord and LOD
|
|
SmallVector<int, 4> Mask;
|
|
for (unsigned I = 0; I < NumElts - 1; ++I)
|
|
Mask.push_back(I);
|
|
|
|
Value *CoordOp =
|
|
Builder.CreateShuffleVector(CoordLODOp, Mask, "hlsl.load.coord");
|
|
Value *LODOp =
|
|
Builder.CreateExtractElement(CoordLODOp, NumElts - 1, "hlsl.load.lod");
|
|
|
|
SmallVector<Value *, 4> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(LODOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 2, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getLoadLevelIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample_cmp: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *CmpOp = EmitScalarExpr(E->getArg(3));
|
|
if (CmpOp->getType() != Builder.getFloatTy())
|
|
CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
|
|
|
|
SmallVector<Value *, 6> Args; // Max 6 arguments for SampleCmp
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(CmpOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
if (E->getNumArgs() <= 5) {
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleCmpIntrinsic(), Args);
|
|
}
|
|
|
|
Args.push_back(emitHlslClamp(*this, E, 5));
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleCmpClampIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_sample_cmp_level_zero: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *CmpOp = EmitScalarExpr(E->getArg(3));
|
|
if (CmpOp->getType() != Builder.getFloatTy())
|
|
CmpOp = Builder.CreateFPCast(CmpOp, Builder.getFloatTy());
|
|
|
|
SmallVector<Value *, 5> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(CmpOp);
|
|
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getSampleCmpLevelZeroIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_calculate_lod: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
|
|
return Builder.CreateIntrinsic(
|
|
ConvertType(E->getType()),
|
|
CGM.getHLSLRuntime().getCalculateLodIntrinsic(),
|
|
{HandleOp, SamplerOp, CoordOp});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_calculate_lod_unclamped: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
|
|
return Builder.CreateIntrinsic(
|
|
ConvertType(E->getType()),
|
|
CGM.getHLSLRuntime().getCalculateLodUnclampedIntrinsic(),
|
|
{HandleOp, SamplerOp, CoordOp});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_gather: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *ComponentOp = EmitScalarExpr(E->getArg(3));
|
|
if (ComponentOp->getType() != Builder.getInt32Ty())
|
|
ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
|
|
/*isSigned=*/false);
|
|
|
|
SmallVector<Value *, 5> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(ComponentOp);
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 4, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getGatherIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_gather_cmp: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *SamplerOp = EmitScalarExpr(E->getArg(1));
|
|
Value *CoordOp = EmitScalarExpr(E->getArg(2));
|
|
Value *CompareOp = EmitScalarExpr(E->getArg(3));
|
|
if (CompareOp->getType() != Builder.getFloatTy())
|
|
CompareOp = Builder.CreateFPCast(CompareOp, Builder.getFloatTy());
|
|
|
|
SmallVector<Value *, 6> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(SamplerOp);
|
|
Args.push_back(CoordOp);
|
|
Args.push_back(CompareOp);
|
|
|
|
if (CGM.getTarget().getTriple().isDXIL()) {
|
|
Value *ComponentOp = EmitScalarExpr(E->getArg(4));
|
|
if (ComponentOp->getType() != Builder.getInt32Ty())
|
|
ComponentOp = Builder.CreateIntCast(ComponentOp, Builder.getInt32Ty(),
|
|
/*isSigned=*/false);
|
|
Args.push_back(ComponentOp);
|
|
}
|
|
|
|
Args.push_back(
|
|
emitHlslOffset(*this, E, 5, getOffsetType(CGM, CoordOp->getType())));
|
|
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getGatherCmpIntrinsic(), Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_load_with_status:
|
|
case Builtin::BI__builtin_hlsl_resource_load_with_status_typed: {
|
|
Value *HandleOp = EmitScalarExpr(E->getArg(0));
|
|
Value *IndexOp = EmitScalarExpr(E->getArg(1));
|
|
|
|
// Get the *address* of the status argument to write to it by reference
|
|
LValue StatusLVal = EmitLValue(E->getArg(2));
|
|
Address StatusAddr = StatusLVal.getAddress();
|
|
|
|
QualType HandleTy = E->getArg(0)->getType();
|
|
const HLSLAttributedResourceType *RT =
|
|
HandleTy->getAs<HLSLAttributedResourceType>();
|
|
assert(CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil &&
|
|
"Only DXIL currently implements load with status");
|
|
|
|
Intrinsic::ID IntrID = RT->getAttrs().RawBuffer
|
|
? llvm::Intrinsic::dx_resource_load_rawbuffer
|
|
: llvm::Intrinsic::dx_resource_load_typedbuffer;
|
|
|
|
llvm::Type *DataTy = ConvertType(E->getType());
|
|
llvm::Type *RetTy = llvm::StructType::get(Builder.getContext(),
|
|
{DataTy, Builder.getInt1Ty()});
|
|
|
|
SmallVector<Value *, 3> Args;
|
|
Args.push_back(HandleOp);
|
|
Args.push_back(IndexOp);
|
|
|
|
if (RT->isRaw()) {
|
|
Value *Offset = Builder.getInt32(0);
|
|
// The offset parameter needs to be poison for ByteAddressBuffer
|
|
if (!RT->isStructured())
|
|
Offset = llvm::PoisonValue::get(Builder.getInt32Ty());
|
|
Args.push_back(Offset);
|
|
}
|
|
|
|
// The load intrinsics give us a (T value, i1 status) pair -
|
|
// shepherd these into the return value and out reference respectively.
|
|
Value *ResRet =
|
|
Builder.CreateIntrinsic(RetTy, IntrID, Args, {}, "ld.struct");
|
|
Value *LoadedValue = Builder.CreateExtractValue(ResRet, {0}, "ld.value");
|
|
Value *StatusBit = Builder.CreateExtractValue(ResRet, {1}, "ld.status");
|
|
Value *ExtendedStatus =
|
|
Builder.CreateZExt(StatusBit, Builder.getInt32Ty(), "ld.status.ext");
|
|
Builder.CreateStore(ExtendedStatus, StatusAddr);
|
|
|
|
return LoadedValue;
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_uninitializedhandle: {
|
|
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
|
|
return llvm::PoisonValue::get(HandleTy);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_handlefrombinding: {
|
|
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
|
|
Value *RegisterOp = EmitScalarExpr(E->getArg(1));
|
|
Value *SpaceOp = EmitScalarExpr(E->getArg(2));
|
|
Value *RangeOp = EmitScalarExpr(E->getArg(3));
|
|
Value *IndexOp = EmitScalarExpr(E->getArg(4));
|
|
Value *Name = EmitScalarExpr(E->getArg(5));
|
|
llvm::Intrinsic::ID IntrinsicID =
|
|
CGM.getHLSLRuntime().getCreateHandleFromBindingIntrinsic();
|
|
SmallVector<Value *> Args{SpaceOp, RegisterOp, RangeOp, IndexOp, Name};
|
|
return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_handlefromimplicitbinding: {
|
|
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
|
|
Value *OrderID = EmitScalarExpr(E->getArg(1));
|
|
Value *SpaceOp = EmitScalarExpr(E->getArg(2));
|
|
Value *RangeOp = EmitScalarExpr(E->getArg(3));
|
|
Value *IndexOp = EmitScalarExpr(E->getArg(4));
|
|
Value *Name = EmitScalarExpr(E->getArg(5));
|
|
llvm::Intrinsic::ID IntrinsicID =
|
|
CGM.getHLSLRuntime().getCreateHandleFromImplicitBindingIntrinsic();
|
|
SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
|
|
return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
|
|
Value *MainHandle = EmitScalarExpr(E->getArg(0));
|
|
if (!CGM.getTriple().isSPIRV())
|
|
return MainHandle;
|
|
|
|
llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
|
|
Value *OrderID = EmitScalarExpr(E->getArg(1));
|
|
Value *SpaceOp = EmitScalarExpr(E->getArg(2));
|
|
llvm::Intrinsic::ID IntrinsicID =
|
|
llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
|
|
SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
|
|
return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
|
|
Value *IndexOp = EmitScalarExpr(E->getArg(0));
|
|
llvm::Type *RetTy = ConvertType(E->getType());
|
|
return Builder.CreateIntrinsic(
|
|
RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(),
|
|
ArrayRef<Value *>{IndexOp});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_x:
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_x_float:
|
|
return emitGetDimensions(*this, E,
|
|
CGM.getHLSLRuntime().getGetDimensionsXIntrinsic(),
|
|
1, /*HasLod=*/false);
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_xy:
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_xy_float:
|
|
return emitGetDimensions(*this, E,
|
|
CGM.getHLSLRuntime().getGetDimensionsXYIntrinsic(),
|
|
2, /*HasLod=*/false);
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy:
|
|
case Builtin::BI__builtin_hlsl_resource_getdimensions_levels_xy_float:
|
|
return emitGetDimensions(
|
|
*this, E, CGM.getHLSLRuntime().getGetDimensionsLevelsXYIntrinsic(), 3,
|
|
/*HasLod=*/true);
|
|
case Builtin::BI__builtin_hlsl_resource_getstride: {
|
|
LValue Stride = EmitLValue(E->getArg(1));
|
|
return emitBufferStride(this, E->getArg(0), Stride);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_all: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
|
|
CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.all");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_and: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
return Builder.CreateAnd(Op0, Op1, "hlsl.and");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_or: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
return Builder.CreateOr(Op0, Op1, "hlsl.or");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_any: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
|
|
CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.any");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_asdouble:
|
|
return handleAsDoubleBuiltin(*this, E);
|
|
case Builtin::BI__builtin_hlsl_elementwise_clamp: {
|
|
Value *OpX = EmitScalarExpr(E->getArg(0));
|
|
Value *OpMin = EmitScalarExpr(E->getArg(1));
|
|
Value *OpMax = EmitScalarExpr(E->getArg(2));
|
|
|
|
QualType Ty = E->getArg(0)->getType();
|
|
if (auto *VecTy = Ty->getAs<VectorType>())
|
|
Ty = VecTy->getElementType();
|
|
|
|
Intrinsic::ID Intr;
|
|
if (Ty->isFloatingType()) {
|
|
Intr = CGM.getHLSLRuntime().getNClampIntrinsic();
|
|
} else if (Ty->isUnsignedIntegerType()) {
|
|
Intr = CGM.getHLSLRuntime().getUClampIntrinsic();
|
|
} else {
|
|
assert(Ty->isSignedIntegerType());
|
|
Intr = CGM.getHLSLRuntime().getSClampIntrinsic();
|
|
}
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/OpX->getType(), Intr,
|
|
ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "hlsl.clamp");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_crossf16:
|
|
case Builtin::BI__builtin_hlsl_crossf32: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
E->getArg(1)->getType()->hasFloatingRepresentation() &&
|
|
"cross operands must have a float representation");
|
|
// make sure each vector has exactly 3 elements
|
|
assert(
|
|
E->getArg(0)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
|
|
E->getArg(1)->getType()->castAs<VectorType>()->getNumElements() == 3 &&
|
|
"input vectors must have 3 elements each");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getCrossIntrinsic(),
|
|
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.cross");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_dot: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
llvm::Type *T0 = Op0->getType();
|
|
llvm::Type *T1 = Op1->getType();
|
|
|
|
// If the arguments are scalars, just emit a multiply
|
|
if (!T0->isVectorTy() && !T1->isVectorTy()) {
|
|
if (T0->isFloatingPointTy())
|
|
return Builder.CreateFMul(Op0, Op1, "hlsl.dot");
|
|
|
|
if (T0->isIntegerTy())
|
|
return Builder.CreateMul(Op0, Op1, "hlsl.dot");
|
|
|
|
llvm_unreachable(
|
|
"Scalar dot product is only supported on ints and floats.");
|
|
}
|
|
// For vectors, validate types and emit the appropriate intrinsic
|
|
assert(CGM.getContext().hasSameUnqualifiedType(E->getArg(0)->getType(),
|
|
E->getArg(1)->getType()) &&
|
|
"Dot product operands must have the same type.");
|
|
|
|
auto *VecTy0 = E->getArg(0)->getType()->castAs<VectorType>();
|
|
assert(VecTy0 && "Dot product argument must be a vector.");
|
|
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/T0->getScalarType(),
|
|
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
|
|
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_dot4add_i8packed: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
Value *Acc = EmitScalarExpr(E->getArg(2));
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddI8PackedIntrinsic();
|
|
// Note that the argument order disagrees between the builtin and the
|
|
// intrinsic here.
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
|
|
nullptr, "hlsl.dot4add.i8packed");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_dot4add_u8packed: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
Value *Acc = EmitScalarExpr(E->getArg(2));
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDot4AddU8PackedIntrinsic();
|
|
// Note that the argument order disagrees between the builtin and the
|
|
// intrinsic here.
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Acc->getType(), ID, ArrayRef<Value *>{Acc, X, Y},
|
|
nullptr, "hlsl.dot4add.u8packed");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/ConvertType(E->getType()),
|
|
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
|
|
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/ConvertType(E->getType()),
|
|
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
|
|
nullptr, "hlsl.firstbitlow");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_lerp: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
Value *Y = EmitScalarExpr(E->getArg(1));
|
|
Value *S = EmitScalarExpr(E->getArg(2));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("lerp operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
|
|
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_normalize: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
"normalize operand must have a float representation");
|
|
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/X->getType(),
|
|
CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
|
|
nullptr, "hlsl.normalize");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_degrees: {
|
|
Value *X = EmitScalarExpr(E->getArg(0));
|
|
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
"degree operand must have a float representation");
|
|
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
|
|
ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
|
|
return handleElementwiseF16ToF32(*this, E);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
|
|
return handleElementwiseF32ToF16(*this, E);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_frac: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("frac operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
|
|
ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_isinf: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
llvm::Type *Xty = Op0->getType();
|
|
llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
|
|
if (Xty->isVectorTy()) {
|
|
auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
|
|
retType = llvm::VectorType::get(
|
|
retType, ElementCount::getFixed(XVecTy->getNumElements()));
|
|
}
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("isinf operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
retType, CGM.getHLSLRuntime().getIsInfIntrinsic(),
|
|
ArrayRef<Value *>{Op0}, nullptr, "hlsl.isinf");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_isnan: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
llvm::Type *Xty = Op0->getType();
|
|
llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());
|
|
if (Xty->isVectorTy()) {
|
|
auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
|
|
retType = llvm::VectorType::get(
|
|
retType, ElementCount::getFixed(XVecTy->getNumElements()));
|
|
}
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("isnan operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
retType, CGM.getHLSLRuntime().getIsNaNIntrinsic(),
|
|
ArrayRef<Value *>{Op0}, nullptr, "hlsl.isnan");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_mad: {
|
|
Value *M = EmitScalarExpr(E->getArg(0));
|
|
Value *A = EmitScalarExpr(E->getArg(1));
|
|
Value *B = EmitScalarExpr(E->getArg(2));
|
|
if (E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType*/ M->getType(), Intrinsic::fmuladd,
|
|
ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");
|
|
|
|
if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {
|
|
if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType*/ M->getType(), Intrinsic::dx_imad,
|
|
ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");
|
|
|
|
Value *Mul = Builder.CreateNSWMul(M, A);
|
|
return Builder.CreateNSWAdd(Mul, B);
|
|
}
|
|
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
|
|
if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/M->getType(), Intrinsic::dx_umad,
|
|
ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
|
|
|
|
Value *Mul = Builder.CreateNUWMul(M, A);
|
|
return Builder.CreateNUWAdd(Mul, B);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_mul: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
QualType QTy0 = E->getArg(0)->getType();
|
|
QualType QTy1 = E->getArg(1)->getType();
|
|
|
|
bool IsVec0 = QTy0->isVectorType();
|
|
bool IsVec1 = QTy1->isVectorType();
|
|
bool IsMat0 = QTy0->isConstantMatrixType();
|
|
bool IsMat1 = QTy1->isConstantMatrixType();
|
|
|
|
// The matrix multiply intrinsic only operates on column-major order
|
|
// matrices. Therefore matrix memory layout transforms must be inserted
|
|
// before and after matrix multiply intrinsics.
|
|
bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
|
|
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
|
|
|
|
llvm::MatrixBuilder MB(Builder);
|
|
if (IsVec0 && IsMat1) {
|
|
unsigned N = QTy0->castAs<VectorType>()->getNumElements();
|
|
auto *MatTy = QTy1->castAs<ConstantMatrixType>();
|
|
unsigned Rows = MatTy->getNumRows();
|
|
unsigned Cols = MatTy->getNumColumns();
|
|
assert(N == Rows && "vector length must match matrix row count");
|
|
if (IsRowMajor)
|
|
Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows, Cols);
|
|
return MB.CreateMatrixMultiply(Op0, Op1, 1, N, Cols, "hlsl.mul");
|
|
}
|
|
if (IsMat0 && IsVec1) {
|
|
auto *MatTy = QTy0->castAs<ConstantMatrixType>();
|
|
unsigned Rows = MatTy->getNumRows();
|
|
unsigned Cols = MatTy->getNumColumns();
|
|
assert(QTy1->castAs<VectorType>()->getNumElements() == Cols &&
|
|
"vector length must match matrix column count");
|
|
if (IsRowMajor)
|
|
Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows, Cols);
|
|
return MB.CreateMatrixMultiply(Op0, Op1, Rows, Cols, 1, "hlsl.mul");
|
|
}
|
|
assert(IsMat0 && IsMat1);
|
|
auto *MatTy0 = QTy0->castAs<ConstantMatrixType>();
|
|
auto *MatTy1 = QTy1->castAs<ConstantMatrixType>();
|
|
unsigned Rows0 = MatTy0->getNumRows();
|
|
unsigned Rows1 = MatTy1->getNumRows();
|
|
unsigned Cols0 = MatTy0->getNumColumns();
|
|
unsigned Cols1 = MatTy1->getNumColumns();
|
|
assert(Cols0 == Rows1 &&
|
|
"inner matrix dimensions must match for multiplication");
|
|
if (IsRowMajor) {
|
|
Op0 = MB.CreateRowMajorToColumnMajorTransform(Op0, Rows0, Cols0);
|
|
Op1 = MB.CreateRowMajorToColumnMajorTransform(Op1, Rows1, Cols1);
|
|
}
|
|
Value *Result =
|
|
MB.CreateMatrixMultiply(Op0, Op1, Rows0, Cols0, Cols1, "hlsl.mul");
|
|
if (IsRowMajor)
|
|
Result = MB.CreateColumnMajorToRowMajorTransform(Result, Rows0, Cols1);
|
|
return Result;
|
|
}
|
|
case Builtin::BI__builtin_hlsl_transpose: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
auto *MatTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
|
|
unsigned Rows = MatTy->getNumRows();
|
|
unsigned Cols = MatTy->getNumColumns();
|
|
llvm::MatrixBuilder MB(Builder);
|
|
// The matrix transpose intrinsic operates on column-major matrices.
|
|
// For row-major, a row-major RxC matrix is equivalent to a column-major
|
|
// CxR matrix, so transposing with swapped dimensions produces the correct
|
|
// row-major CxR result directly.
|
|
bool IsRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
|
|
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
|
|
if (IsRowMajor)
|
|
return MB.CreateMatrixTranspose(Op0, Cols, Rows);
|
|
return MB.CreateMatrixTranspose(Op0, Rows, Cols);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_rcp: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("rcp operand must have a float representation");
|
|
llvm::Type *Ty = Op0->getType();
|
|
llvm::Type *EltTy = Ty->getScalarType();
|
|
Constant *One = Ty->isVectorTy()
|
|
? ConstantVector::getSplat(
|
|
ElementCount::getFixed(
|
|
cast<FixedVectorType>(Ty)->getNumElements()),
|
|
ConstantFP::get(EltTy, 1.0))
|
|
: ConstantFP::get(EltTy, 1.0);
|
|
return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("rsqrt operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
|
|
ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_saturate: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
"saturate operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(),
|
|
CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
|
|
nullptr, "hlsl.saturate");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_prefix_count_bits: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(Op->getType()->isIntegerTy(1) &&
|
|
"WavePrefixBitCount operand must be a boolean type");
|
|
|
|
Intrinsic::ID IID =
|
|
getPrefixCountBitsIntrinsic(getTarget().getTriple().getArch());
|
|
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), IID), ArrayRef{Op},
|
|
"hlsl.wave.prefix.bit.count");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_select: {
|
|
Value *OpCond = EmitScalarExpr(E->getArg(0));
|
|
RValue RValTrue = EmitAnyExpr(E->getArg(1));
|
|
Value *OpTrue =
|
|
RValTrue.isScalar()
|
|
? RValTrue.getScalarVal()
|
|
: Builder.CreateLoad(RValTrue.getAggregateAddress(), "true_val");
|
|
RValue RValFalse = EmitAnyExpr(E->getArg(2));
|
|
Value *OpFalse =
|
|
RValFalse.isScalar()
|
|
? RValFalse.getScalarVal()
|
|
: Builder.CreateLoad(RValFalse.getAggregateAddress(), "false_val");
|
|
if (auto *VTy = E->getType()->getAs<VectorType>()) {
|
|
if (!OpTrue->getType()->isVectorTy())
|
|
OpTrue =
|
|
Builder.CreateVectorSplat(VTy->getNumElements(), OpTrue, "splat");
|
|
if (!OpFalse->getType()->isVectorTy())
|
|
OpFalse =
|
|
Builder.CreateVectorSplat(VTy->getNumElements(), OpFalse, "splat");
|
|
}
|
|
|
|
Value *SelectVal =
|
|
Builder.CreateSelect(OpCond, OpTrue, OpFalse, "hlsl.select");
|
|
if (!RValTrue.isScalar())
|
|
Builder.CreateStore(SelectVal, ReturnValue.getAddress(),
|
|
ReturnValue.isVolatile());
|
|
|
|
return SelectVal;
|
|
}
|
|
case Builtin::BI__builtin_hlsl_step: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
Value *Op1 = EmitScalarExpr(E->getArg(1));
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
E->getArg(1)->getType()->hasFloatingRepresentation() &&
|
|
"step operands must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getStepIntrinsic(),
|
|
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.step");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_all_equal: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllEqualIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {Op->getType()}),
|
|
{Op});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_all_true: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(Op->getType()->isIntegerTy(1) &&
|
|
"Intrinsic WaveActiveAllTrue operand must be a bool");
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAllTrueIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_any_true: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(Op->getType()->isIntegerTy(1) &&
|
|
"Intrinsic WaveActiveAnyTrue operand must be a bool");
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveAnyTrueIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
|
|
"Intrinsic WaveActiveBitOr operand must have an unsigned integer "
|
|
"representation");
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {Op->getType()}),
|
|
ArrayRef{Op}, "hlsl.wave.active.bit.or");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
|
|
"Intrinsic WaveActiveBitXor operand must have an unsigned integer "
|
|
"representation");
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {Op->getType()}),
|
|
ArrayRef{Op}, "hlsl.wave.active.bit.xor");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_bit_and: {
|
|
Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
|
|
"Intrinsic WaveActiveBitAnd operand must have an unsigned integer "
|
|
"representation");
|
|
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitAndIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {Op->getType()}),
|
|
ArrayRef{Op}, "hlsl.wave.active.bit.and");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_ballot: {
|
|
[[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
|
|
assert(Op->getType()->isIntegerTy(1) &&
|
|
"Intrinsic WaveActiveBallot operand must be a bool");
|
|
|
|
return handleHlslWaveActiveBallot(*this, E);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveCountBitsIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID),
|
|
ArrayRef{OpExpr});
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_sum: {
|
|
// Due to the use of variadic arguments, explicitly retrieve argument
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID IID = getWaveActiveSumIntrinsic(
|
|
getTarget().getTriple().getArch(), E->getArg(0)->getType());
|
|
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.active.sum");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_product: {
|
|
// Due to the use of variadic arguments, explicitly retrieve argument
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID IID = getWaveActiveProductIntrinsic(
|
|
getTarget().getTriple().getArch(), E->getArg(0)->getType());
|
|
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.active.product");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_max: {
|
|
// Due to the use of variadic arguments, explicitly retrieve argument
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
QualType QT = E->getArg(0)->getType();
|
|
Intrinsic::ID IID;
|
|
if (QT->isUnsignedIntegerType())
|
|
IID = CGM.getHLSLRuntime().getWaveActiveUMaxIntrinsic();
|
|
else
|
|
IID = CGM.getHLSLRuntime().getWaveActiveMaxIntrinsic();
|
|
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.active.max");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_active_min: {
|
|
// Due to the use of variadic arguments, explicitly retrieve argument
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
QualType QT = E->getArg(0)->getType();
|
|
Intrinsic::ID IID;
|
|
if (QT->isUnsignedIntegerType())
|
|
IID = CGM.getHLSLRuntime().getWaveActiveUMinIntrinsic();
|
|
else
|
|
IID = CGM.getHLSLRuntime().getWaveActiveMinIntrinsic();
|
|
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.active.min");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
|
|
// We don't define a SPIR-V intrinsic, instead it is a SPIR-V built-in
|
|
// defined in SPIRVBuiltins.td. So instead we manually get the matching name
|
|
// for the DirectX intrinsic and the demangled builtin name
|
|
switch (CGM.getTarget().getTriple().getArch()) {
|
|
case llvm::Triple::dxil:
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), Intrinsic::dx_wave_getlaneindex));
|
|
case llvm::Triple::spirv:
|
|
return EmitRuntimeCall(CGM.CreateRuntimeFunction(
|
|
llvm::FunctionType::get(IntTy, {}, false),
|
|
"__hlsl_wave_get_lane_index", {}, false, true));
|
|
default:
|
|
llvm_unreachable(
|
|
"Intrinsic WaveGetLaneIndex not supported by target architecture");
|
|
}
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_is_first_lane: {
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveIsFirstLaneIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_get_lane_count: {
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveGetLaneCountIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_read_lane_at: {
|
|
// Due to the use of variadic arguments we must explicitly retrieve them and
|
|
// create our function type.
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Value *OpIndex = EmitScalarExpr(E->getArg(1));
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), CGM.getHLSLRuntime().getWaveReadLaneAtIntrinsic(),
|
|
{OpExpr->getType()}),
|
|
ArrayRef{OpExpr, OpIndex}, "hlsl.wave.readlane");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_prefix_sum: {
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID IID = getWavePrefixSumIntrinsic(
|
|
getTarget().getTriple().getArch(), E->getArg(0)->getType());
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.prefix.sum");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_wave_prefix_product: {
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID IID = getWavePrefixProductIntrinsic(
|
|
getTarget().getTriple().getArch(), E->getArg(0)->getType());
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), IID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.wave.prefix.product");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_quad_read_across_x: {
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossXIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.quad.read.across.x");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_quad_read_across_y: {
|
|
Value *OpExpr = EmitScalarExpr(E->getArg(0));
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getQuadReadAcrossYIntrinsic();
|
|
return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
|
|
&CGM.getModule(), ID, {OpExpr->getType()}),
|
|
ArrayRef{OpExpr}, "hlsl.quad.read.across.y");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_sign: {
|
|
auto *Arg0 = E->getArg(0);
|
|
Value *Op0 = EmitScalarExpr(Arg0);
|
|
llvm::Type *Xty = Op0->getType();
|
|
llvm::Type *retType = llvm::Type::getInt32Ty(this->getLLVMContext());
|
|
if (Xty->isVectorTy()) {
|
|
auto *XVecTy = Arg0->getType()->castAs<VectorType>();
|
|
retType = llvm::VectorType::get(
|
|
retType, ElementCount::getFixed(XVecTy->getNumElements()));
|
|
}
|
|
assert((Arg0->getType()->hasFloatingRepresentation() ||
|
|
Arg0->getType()->hasIntegerRepresentation()) &&
|
|
"sign operand must have a float or int representation");
|
|
|
|
if (Arg0->getType()->hasUnsignedIntegerRepresentation()) {
|
|
Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::get(Xty, 0));
|
|
return Builder.CreateSelect(Cmp, ConstantInt::get(retType, 0),
|
|
ConstantInt::get(retType, 1), "hlsl.sign");
|
|
}
|
|
|
|
return Builder.CreateIntrinsic(
|
|
retType, CGM.getHLSLRuntime().getSignIntrinsic(),
|
|
ArrayRef<Value *>{Op0}, nullptr, "hlsl.sign");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_radians: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
"radians operand must have a float representation");
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Op0->getType(),
|
|
CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
|
|
nullptr, "hlsl.radians");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_buffer_update_counter: {
|
|
Value *ResHandle = EmitScalarExpr(E->getArg(0));
|
|
Value *Offset = EmitScalarExpr(E->getArg(1));
|
|
Value *OffsetI8 = Builder.CreateIntCast(Offset, Int8Ty, true);
|
|
return Builder.CreateIntrinsic(
|
|
/*ReturnType=*/Offset->getType(),
|
|
CGM.getHLSLRuntime().getBufferUpdateCounterIntrinsic(),
|
|
ArrayRef<Value *>{ResHandle, OffsetI8}, nullptr);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
|
|
|
|
assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
|
|
E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
|
|
"asuint operands types mismatch");
|
|
return handleHlslSplitdouble(E, this);
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_clip:
|
|
assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
|
|
"clip operands types mismatch");
|
|
return handleHlslClip(E, this);
|
|
case Builtin::BI__builtin_hlsl_all_memory_barrier: {
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: {
|
|
Intrinsic::ID ID =
|
|
CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_device_memory_barrier: {
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: {
|
|
Intrinsic::ID ID =
|
|
CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_group_memory_barrier: {
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_group_memory_barrier_with_group_sync: {
|
|
Intrinsic::ID ID =
|
|
CGM.getHLSLRuntime().getGroupMemoryBarrierWithGroupSyncIntrinsic();
|
|
return EmitRuntimeCall(
|
|
Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID));
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("ddx_coarse operand must have a float representation");
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic();
|
|
return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.ddx.coarse");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("ddy_coarse operand must have a float representation");
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic();
|
|
return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.ddy.coarse");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_ddx_fine: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("ddx_fine operand must have a float representation");
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxFineIntrinsic();
|
|
return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.ddx.fine");
|
|
}
|
|
case Builtin::BI__builtin_hlsl_elementwise_ddy_fine: {
|
|
Value *Op0 = EmitScalarExpr(E->getArg(0));
|
|
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
|
|
llvm_unreachable("ddy_fine operand must have a float representation");
|
|
Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyFineIntrinsic();
|
|
return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID,
|
|
ArrayRef<Value *>{Op0}, nullptr,
|
|
"hlsl.ddy.fine");
|
|
}
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_bool:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_short:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_ushort:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_int:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_uint:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_longlong:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_ulonglong:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_half:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_float:
|
|
case Builtin::BI__builtin_get_spirv_spec_constant_double: {
|
|
llvm::Function *SpecConstantFn = getSpecConstantFunction(E->getType());
|
|
llvm::Value *SpecId = EmitScalarExpr(E->getArg(0));
|
|
llvm::Value *DefaultVal = EmitScalarExpr(E->getArg(1));
|
|
llvm::Value *Args[] = {SpecId, DefaultVal};
|
|
return Builder.CreateCall(SpecConstantFn, Args);
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
llvm::Function *clang::CodeGen::CodeGenFunction::getSpecConstantFunction(
|
|
const clang::QualType &SpecConstantType) {
|
|
|
|
// Find or create the declaration for the function.
|
|
llvm::Module *M = &CGM.getModule();
|
|
std::string MangledName =
|
|
getSpecConstantFunctionName(SpecConstantType, getContext());
|
|
llvm::Function *SpecConstantFn = M->getFunction(MangledName);
|
|
|
|
if (!SpecConstantFn) {
|
|
llvm::Type *IntType = ConvertType(getContext().IntTy);
|
|
llvm::Type *RetTy = ConvertType(SpecConstantType);
|
|
llvm::Type *ArgTypes[] = {IntType, RetTy};
|
|
llvm::FunctionType *FnTy = llvm::FunctionType::get(RetTy, ArgTypes, false);
|
|
SpecConstantFn = llvm::Function::Create(
|
|
FnTy, llvm::GlobalValue::ExternalLinkage, MangledName, M);
|
|
}
|
|
return SpecConstantFn;
|
|
}
|