[CIR] Implement emitStoreThroughLValue for ExtVectorType (#194127)

Implement emitStoreThroughLValue for ExtVectorType 

Issue https://github.com/llvm/llvm-project/issues/192311
This commit is contained in:
Amr Hesham
2026-04-30 20:26:09 +02:00
committed by GitHub
parent 4d154f6ea5
commit 04ec6544d4
3 changed files with 156 additions and 2 deletions

View File

@@ -303,6 +303,92 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
/*forPointeeType=*/true, baseInfo);
}
void CIRGenFunction::emitStoreThroughExtVectorComponentLValue(RValue src,
LValue dst) {
auto getScalarSizeInBits = [&](mlir::Type ty) -> unsigned {
mlir::Type scalarTy = mlir::isa<cir::VectorType>(ty)
? mlir::cast<cir::VectorType>(ty).getElementType()
: ty;
cir::CIRDataLayout dl = cgm.getDataLayout();
return dl.getTypeSizeInBits(scalarTy).getFixedValue();
};
mlir::Value srcVal = src.getValue();
Address dstAddr = dst.getExtVectorAddress();
if (getScalarSizeInBits(dstAddr.getElementType()) >
getScalarSizeInBits(srcVal.getType())) {
cgm.errorNYI(
dst.getPointer().getLoc(),
"emitStoreThroughExtVectorComponentLValue: dstTySize > srcTysize");
return;
}
if (getLangOpts().HLSL) {
cgm.errorNYI(dst.getPointer().getLoc(),
"emitStoreThroughExtVectorComponentLValue: HLSL");
return;
}
// This access turns into a read/modify/write of the vector. Load the input
// value now.
mlir::Location loc = dst.getExtVectorPointer().getLoc();
mlir::ArrayAttr elts = dst.getExtVectorElts();
mlir::Value vec = builder.createLoad(loc, dstAddr, dst.isVolatile());
if (const auto *vecTy = dst.getType()->getAs<clang::VectorType>()) {
unsigned numSrcElts = vecTy->getNumElements();
unsigned numDstElts = cast<cir::VectorType>(vec.getType()).getSize();
if (numDstElts == numSrcElts) {
// Use shuffle vector is the src and destination are the same number of
// elements and restore the vector mask since it is on the side it will be
// stored.
SmallVector<int64_t> mask(numDstElts);
for (unsigned i = 0; i != numDstElts; ++i)
mask[getAccessedFieldNo(i, elts)] = i;
vec = builder.createVecShuffle(loc, srcVal, mask);
} else if (numDstElts > numSrcElts) {
// Extended the source vector to the same length and then shuffle it
// into the destination.
// FIXME: since we're shuffling with undef, can we just use the indices
// into that? This could be simpler.
SmallVector<int64_t> extMask(numDstElts, -1);
std::iota(extMask.begin(), extMask.begin() + numSrcElts, 0);
mlir::Value extSrcVal = builder.createVecShuffle(loc, srcVal, extMask);
// build identity
SmallVector<int64_t> mask(numDstElts);
std::iota(mask.begin(), mask.begin() + numDstElts, 0);
// When the vector size is odd and .odd or .hi is used, the last element
// of the Elts constant array will be one past the size of the vector.
// Ignore the last element here, if it is greater than the mask size.
if ((unsigned)getAccessedFieldNo(numSrcElts - 1, elts) == mask.size())
numSrcElts--;
// modify when what gets shuffled in
for (unsigned i = 0; i != numSrcElts; ++i)
mask[getAccessedFieldNo(i, elts)] = i + numDstElts;
vec = builder.createVecShuffle(loc, vec, extSrcVal, mask);
} else {
// We should never shorten the vector
llvm_unreachable("unexpected shorten vector length");
}
} else {
// If the Src is a scalar (not a vector), and the target is a vector it
// must be updating one element.
unsigned inIdx = getAccessedFieldNo(0, elts);
cir::ConstantOp elt = builder.getSInt64(inIdx, loc);
vec = cir::VecInsertOp::create(builder, loc, vec, srcVal, elt);
}
builder.createStore(loc, vec, dst.getExtVectorAddress(),
dst.isVolatileQualified());
}
void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
bool isInit) {
if (!dst.isSimple()) {
@@ -317,6 +403,9 @@ void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
return;
}
if (dst.isExtVectorElt())
return emitStoreThroughExtVectorComponentLValue(src, dst);
assert(dst.isBitField() && "Unknown LValue type");
emitStoreThroughBitfieldLValue(src, dst);
return;

View File

@@ -2070,9 +2070,11 @@ public:
bool isInit = false, bool isNontemporal = false);
void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit);
void emitStoreThroughExtVectorComponentLValue(RValue src, LValue dst);
/// Store the specified rvalue into the specified
/// lvalue, where both are guaranteed to the have the same type, and that type
/// is 'Ty'.
/// lvalue, where both are guaranteed to the have the same type, and that
/// type is 'Ty'.
void emitStoreThroughLValue(RValue src, LValue dst, bool isInit = false);
mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);

View File

@@ -339,3 +339,66 @@ void array_subscript_expr_with_element_expr_base() {
// OGCG: %[[VEC_MEMBER_EXPR:.*]] = getelementptr inbounds i32, ptr %[[A_ADDR]], i64 0
// OGCG: %[[VEC_ELEM_PTR:.*]] = getelementptr inbounds i32, ptr %[[VEC_MEMBER_EXPR]], i64 1
// OGCG: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4
void store_src_dest_same_size() {
vi4 a;
vi2 b;
b.xy = a.xy;
}
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
// CIR: %[[SHUFFLE_A:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[SHUFFLE_A]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
// CIR: cir.store {{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
// LLVM: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
// LLVM: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
// OGCG: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
// OGCG: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
void store_src_dest_not_same_size() {
vi4 a;
vi2 b;
a.lo = b;
}
// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
// CIR: %[[SHUFFLE_B:.*]] = cir.vec.shuffle(%[[TMP_B]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<4 x !s32i>
// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[SHUFFLE_B]] : !cir.vector<4 x !s32i>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s32i>
// CIR: cir.store {{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
// LLVM: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
// LLVM: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16
// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
// OGCG: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
// OGCG: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16