[CIR] Implement emitStoreThroughLValue for ExtVectorType (#194127)
Implement emitStoreThroughLValue for ExtVectorType Issue https://github.com/llvm/llvm-project/issues/192311
This commit is contained in:
@@ -303,6 +303,92 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
|
||||
/*forPointeeType=*/true, baseInfo);
|
||||
}
|
||||
|
||||
void CIRGenFunction::emitStoreThroughExtVectorComponentLValue(RValue src,
|
||||
LValue dst) {
|
||||
auto getScalarSizeInBits = [&](mlir::Type ty) -> unsigned {
|
||||
mlir::Type scalarTy = mlir::isa<cir::VectorType>(ty)
|
||||
? mlir::cast<cir::VectorType>(ty).getElementType()
|
||||
: ty;
|
||||
cir::CIRDataLayout dl = cgm.getDataLayout();
|
||||
return dl.getTypeSizeInBits(scalarTy).getFixedValue();
|
||||
};
|
||||
|
||||
mlir::Value srcVal = src.getValue();
|
||||
Address dstAddr = dst.getExtVectorAddress();
|
||||
if (getScalarSizeInBits(dstAddr.getElementType()) >
|
||||
getScalarSizeInBits(srcVal.getType())) {
|
||||
cgm.errorNYI(
|
||||
dst.getPointer().getLoc(),
|
||||
"emitStoreThroughExtVectorComponentLValue: dstTySize > srcTysize");
|
||||
return;
|
||||
}
|
||||
|
||||
if (getLangOpts().HLSL) {
|
||||
cgm.errorNYI(dst.getPointer().getLoc(),
|
||||
"emitStoreThroughExtVectorComponentLValue: HLSL");
|
||||
return;
|
||||
}
|
||||
|
||||
// This access turns into a read/modify/write of the vector. Load the input
|
||||
// value now.
|
||||
mlir::Location loc = dst.getExtVectorPointer().getLoc();
|
||||
|
||||
mlir::ArrayAttr elts = dst.getExtVectorElts();
|
||||
|
||||
mlir::Value vec = builder.createLoad(loc, dstAddr, dst.isVolatile());
|
||||
if (const auto *vecTy = dst.getType()->getAs<clang::VectorType>()) {
|
||||
unsigned numSrcElts = vecTy->getNumElements();
|
||||
unsigned numDstElts = cast<cir::VectorType>(vec.getType()).getSize();
|
||||
if (numDstElts == numSrcElts) {
|
||||
// Use shuffle vector is the src and destination are the same number of
|
||||
// elements and restore the vector mask since it is on the side it will be
|
||||
// stored.
|
||||
SmallVector<int64_t> mask(numDstElts);
|
||||
for (unsigned i = 0; i != numDstElts; ++i)
|
||||
mask[getAccessedFieldNo(i, elts)] = i;
|
||||
|
||||
vec = builder.createVecShuffle(loc, srcVal, mask);
|
||||
} else if (numDstElts > numSrcElts) {
|
||||
// Extended the source vector to the same length and then shuffle it
|
||||
// into the destination.
|
||||
// FIXME: since we're shuffling with undef, can we just use the indices
|
||||
// into that? This could be simpler.
|
||||
SmallVector<int64_t> extMask(numDstElts, -1);
|
||||
std::iota(extMask.begin(), extMask.begin() + numSrcElts, 0);
|
||||
|
||||
mlir::Value extSrcVal = builder.createVecShuffle(loc, srcVal, extMask);
|
||||
|
||||
// build identity
|
||||
SmallVector<int64_t> mask(numDstElts);
|
||||
std::iota(mask.begin(), mask.begin() + numDstElts, 0);
|
||||
|
||||
// When the vector size is odd and .odd or .hi is used, the last element
|
||||
// of the Elts constant array will be one past the size of the vector.
|
||||
// Ignore the last element here, if it is greater than the mask size.
|
||||
if ((unsigned)getAccessedFieldNo(numSrcElts - 1, elts) == mask.size())
|
||||
numSrcElts--;
|
||||
|
||||
// modify when what gets shuffled in
|
||||
for (unsigned i = 0; i != numSrcElts; ++i)
|
||||
mask[getAccessedFieldNo(i, elts)] = i + numDstElts;
|
||||
|
||||
vec = builder.createVecShuffle(loc, vec, extSrcVal, mask);
|
||||
} else {
|
||||
// We should never shorten the vector
|
||||
llvm_unreachable("unexpected shorten vector length");
|
||||
}
|
||||
} else {
|
||||
// If the Src is a scalar (not a vector), and the target is a vector it
|
||||
// must be updating one element.
|
||||
unsigned inIdx = getAccessedFieldNo(0, elts);
|
||||
cir::ConstantOp elt = builder.getSInt64(inIdx, loc);
|
||||
vec = cir::VecInsertOp::create(builder, loc, vec, srcVal, elt);
|
||||
}
|
||||
|
||||
builder.createStore(loc, vec, dst.getExtVectorAddress(),
|
||||
dst.isVolatileQualified());
|
||||
}
|
||||
|
||||
void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
|
||||
bool isInit) {
|
||||
if (!dst.isSimple()) {
|
||||
@@ -317,6 +403,9 @@ void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
|
||||
return;
|
||||
}
|
||||
|
||||
if (dst.isExtVectorElt())
|
||||
return emitStoreThroughExtVectorComponentLValue(src, dst);
|
||||
|
||||
assert(dst.isBitField() && "Unknown LValue type");
|
||||
emitStoreThroughBitfieldLValue(src, dst);
|
||||
return;
|
||||
|
||||
@@ -2070,9 +2070,11 @@ public:
|
||||
bool isInit = false, bool isNontemporal = false);
|
||||
void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit);
|
||||
|
||||
void emitStoreThroughExtVectorComponentLValue(RValue src, LValue dst);
|
||||
|
||||
/// Store the specified rvalue into the specified
|
||||
/// lvalue, where both are guaranteed to the have the same type, and that type
|
||||
/// is 'Ty'.
|
||||
/// lvalue, where both are guaranteed to the have the same type, and that
|
||||
/// type is 'Ty'.
|
||||
void emitStoreThroughLValue(RValue src, LValue dst, bool isInit = false);
|
||||
|
||||
mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
|
||||
|
||||
@@ -339,3 +339,66 @@ void array_subscript_expr_with_element_expr_base() {
|
||||
// OGCG: %[[VEC_MEMBER_EXPR:.*]] = getelementptr inbounds i32, ptr %[[A_ADDR]], i64 0
|
||||
// OGCG: %[[VEC_ELEM_PTR:.*]] = getelementptr inbounds i32, ptr %[[VEC_MEMBER_EXPR]], i64 1
|
||||
// OGCG: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4
|
||||
|
||||
void store_src_dest_same_size() {
|
||||
vi4 a;
|
||||
vi2 b;
|
||||
b.xy = a.xy;
|
||||
}
|
||||
|
||||
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
|
||||
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
|
||||
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
|
||||
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
|
||||
// CIR: %[[SHUFFLE_A:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
|
||||
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
|
||||
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
|
||||
// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[SHUFFLE_A]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
|
||||
// CIR: cir.store {{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>
|
||||
|
||||
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
|
||||
// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
|
||||
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
|
||||
// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
|
||||
// LLVM: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
// LLVM: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
|
||||
|
||||
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
|
||||
// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
|
||||
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
|
||||
// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
|
||||
// OGCG: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
// OGCG: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
|
||||
|
||||
void store_src_dest_not_same_size() {
|
||||
vi4 a;
|
||||
vi2 b;
|
||||
a.lo = b;
|
||||
}
|
||||
|
||||
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
|
||||
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
|
||||
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
|
||||
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
|
||||
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
|
||||
// CIR: %[[SHUFFLE_B:.*]] = cir.vec.shuffle(%[[TMP_B]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<4 x !s32i>
|
||||
// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[SHUFFLE_B]] : !cir.vector<4 x !s32i>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s32i>
|
||||
// CIR: cir.store {{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
|
||||
|
||||
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
|
||||
// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
|
||||
// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
|
||||
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
|
||||
// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
// LLVM: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16
|
||||
|
||||
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
|
||||
// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
|
||||
// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
|
||||
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
|
||||
// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
// OGCG: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16
|
||||
|
||||
Reference in New Issue
Block a user