From 04ec6544d4e0cebfabc9a55c2e4ce4d4c023100f Mon Sep 17 00:00:00 2001 From: Amr Hesham Date: Thu, 30 Apr 2026 20:26:09 +0200 Subject: [PATCH] [CIR] Implement emitStoreThroughLValue for ExtVectorType (#194127) Implement emitStoreThroughLValue for ExtVectorType Issue https://github.com/llvm/llvm-project/issues/192311 --- clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 89 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 6 +- clang/test/CIR/CodeGen/vector-ext-element.cpp | 63 +++++++++++++ 3 files changed, 156 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index ef1aacb9779f..2959dc567da0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -303,6 +303,92 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr, /*forPointeeType=*/true, baseInfo); } +void CIRGenFunction::emitStoreThroughExtVectorComponentLValue(RValue src, + LValue dst) { + auto getScalarSizeInBits = [&](mlir::Type ty) -> unsigned { + mlir::Type scalarTy = mlir::isa(ty) + ? mlir::cast(ty).getElementType() + : ty; + cir::CIRDataLayout dl = cgm.getDataLayout(); + return dl.getTypeSizeInBits(scalarTy).getFixedValue(); + }; + + mlir::Value srcVal = src.getValue(); + Address dstAddr = dst.getExtVectorAddress(); + if (getScalarSizeInBits(dstAddr.getElementType()) > + getScalarSizeInBits(srcVal.getType())) { + cgm.errorNYI( + dst.getPointer().getLoc(), + "emitStoreThroughExtVectorComponentLValue: dstTySize > srcTysize"); + return; + } + + if (getLangOpts().HLSL) { + cgm.errorNYI(dst.getPointer().getLoc(), + "emitStoreThroughExtVectorComponentLValue: HLSL"); + return; + } + + // This access turns into a read/modify/write of the vector. Load the input + // value now. + mlir::Location loc = dst.getExtVectorPointer().getLoc(); + + mlir::ArrayAttr elts = dst.getExtVectorElts(); + + mlir::Value vec = builder.createLoad(loc, dstAddr, dst.isVolatile()); + if (const auto *vecTy = dst.getType()->getAs()) { + unsigned numSrcElts = vecTy->getNumElements(); + unsigned numDstElts = cast(vec.getType()).getSize(); + if (numDstElts == numSrcElts) { + // Use shuffle vector is the src and destination are the same number of + // elements and restore the vector mask since it is on the side it will be + // stored. + SmallVector mask(numDstElts); + for (unsigned i = 0; i != numDstElts; ++i) + mask[getAccessedFieldNo(i, elts)] = i; + + vec = builder.createVecShuffle(loc, srcVal, mask); + } else if (numDstElts > numSrcElts) { + // Extended the source vector to the same length and then shuffle it + // into the destination. + // FIXME: since we're shuffling with undef, can we just use the indices + // into that? This could be simpler. + SmallVector extMask(numDstElts, -1); + std::iota(extMask.begin(), extMask.begin() + numSrcElts, 0); + + mlir::Value extSrcVal = builder.createVecShuffle(loc, srcVal, extMask); + + // build identity + SmallVector mask(numDstElts); + std::iota(mask.begin(), mask.begin() + numDstElts, 0); + + // When the vector size is odd and .odd or .hi is used, the last element + // of the Elts constant array will be one past the size of the vector. + // Ignore the last element here, if it is greater than the mask size. + if ((unsigned)getAccessedFieldNo(numSrcElts - 1, elts) == mask.size()) + numSrcElts--; + + // modify when what gets shuffled in + for (unsigned i = 0; i != numSrcElts; ++i) + mask[getAccessedFieldNo(i, elts)] = i + numDstElts; + + vec = builder.createVecShuffle(loc, vec, extSrcVal, mask); + } else { + // We should never shorten the vector + llvm_unreachable("unexpected shorten vector length"); + } + } else { + // If the Src is a scalar (not a vector), and the target is a vector it + // must be updating one element. + unsigned inIdx = getAccessedFieldNo(0, elts); + cir::ConstantOp elt = builder.getSInt64(inIdx, loc); + vec = cir::VecInsertOp::create(builder, loc, vec, srcVal, elt); + } + + builder.createStore(loc, vec, dst.getExtVectorAddress(), + dst.isVolatileQualified()); +} + void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst, bool isInit) { if (!dst.isSimple()) { @@ -317,6 +403,9 @@ void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst, return; } + if (dst.isExtVectorElt()) + return emitStoreThroughExtVectorComponentLValue(src, dst); + assert(dst.isBitField() && "Unknown LValue type"); emitStoreThroughBitfieldLValue(src, dst); return; diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index aadd2ff74165..3905c154e472 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -2070,9 +2070,11 @@ public: bool isInit = false, bool isNontemporal = false); void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit); + void emitStoreThroughExtVectorComponentLValue(RValue src, LValue dst); + /// Store the specified rvalue into the specified - /// lvalue, where both are guaranteed to the have the same type, and that type - /// is 'Ty'. + /// lvalue, where both are guaranteed to the have the same type, and that + /// type is 'Ty'. void emitStoreThroughLValue(RValue src, LValue dst, bool isInit = false); mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult); diff --git a/clang/test/CIR/CodeGen/vector-ext-element.cpp b/clang/test/CIR/CodeGen/vector-ext-element.cpp index 26c94e03a625..1d071a583f5d 100644 --- a/clang/test/CIR/CodeGen/vector-ext-element.cpp +++ b/clang/test/CIR/CodeGen/vector-ext-element.cpp @@ -339,3 +339,66 @@ void array_subscript_expr_with_element_expr_base() { // OGCG: %[[VEC_MEMBER_EXPR:.*]] = getelementptr inbounds i32, ptr %[[A_ADDR]], i64 0 // OGCG: %[[VEC_ELEM_PTR:.*]] = getelementptr inbounds i32, ptr %[[VEC_MEMBER_EXPR]], i64 1 // OGCG: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4 + +void store_src_dest_same_size() { + vi4 a; + vi2 b; + b.xy = a.xy; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr>, ["b"] +// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !s32i> +// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i> +// CIR: %[[SHUFFLE_A:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i> +// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.vector<2 x !s32i> +// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i> +// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[SHUFFLE_A]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i> +// CIR: cir.store {{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> +// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8 +// LLVM: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> +// LLVM: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> +// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8 +// OGCG: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> +// OGCG: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8 + +void store_src_dest_not_same_size() { + vi4 a; + vi2 b; + a.lo = b; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr>, ["b"] +// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr>, !cir.vector<2 x !s32i> +// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr>, !cir.vector<4 x !s32i> +// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i> +// CIR: %[[SHUFFLE_B:.*]] = cir.vec.shuffle(%[[TMP_B]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<4 x !s32i> +// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[SHUFFLE_B]] : !cir.vector<4 x !s32i>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s32i> +// CIR: cir.store {{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8 +// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> +// LLVM: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> +// LLVM: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8 +// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> +// OGCG: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> +// OGCG: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16