[CIR] Implement emitStoreThroughLValue for ExtVectorType (#194127)

Implement emitStoreThroughLValue for ExtVectorType Issue https://github.com/llvm/llvm-project/issues/192311
2026-04-30 20:26:09 +02:00
parent 4d154f6ea5
commit 04ec6544d4
3 changed files with 156 additions and 2 deletions
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -303,6 +303,92 @@ Address CIRGenFunction::emitPointerWithAlignment(const Expr *expr,
      /*forPointeeType=*/true, baseInfo);
 }

+void CIRGenFunction::emitStoreThroughExtVectorComponentLValue(RValue src,
+                                                              LValue dst) {
+  auto getScalarSizeInBits = [&](mlir::Type ty) -> unsigned {
+    mlir::Type scalarTy = mlir::isa<cir::VectorType>(ty)
+                              ? mlir::cast<cir::VectorType>(ty).getElementType()
+                              : ty;
+    cir::CIRDataLayout dl = cgm.getDataLayout();
+    return dl.getTypeSizeInBits(scalarTy).getFixedValue();
+  };
+
+  mlir::Value srcVal = src.getValue();
+  Address dstAddr = dst.getExtVectorAddress();
+  if (getScalarSizeInBits(dstAddr.getElementType()) >
+      getScalarSizeInBits(srcVal.getType())) {
+    cgm.errorNYI(
+        dst.getPointer().getLoc(),
+        "emitStoreThroughExtVectorComponentLValue: dstTySize > srcTysize");
+    return;
+  }
+
+  if (getLangOpts().HLSL) {
+    cgm.errorNYI(dst.getPointer().getLoc(),
+                 "emitStoreThroughExtVectorComponentLValue: HLSL");
+    return;
+  }
+
+  // This access turns into a read/modify/write of the vector.  Load the input
+  // value now.
+  mlir::Location loc = dst.getExtVectorPointer().getLoc();
+
+  mlir::ArrayAttr elts = dst.getExtVectorElts();
+
+  mlir::Value vec = builder.createLoad(loc, dstAddr, dst.isVolatile());
+  if (const auto *vecTy = dst.getType()->getAs<clang::VectorType>()) {
+    unsigned numSrcElts = vecTy->getNumElements();
+    unsigned numDstElts = cast<cir::VectorType>(vec.getType()).getSize();
+    if (numDstElts == numSrcElts) {
+      // Use shuffle vector is the src and destination are the same number of
+      // elements and restore the vector mask since it is on the side it will be
+      // stored.
+      SmallVector<int64_t> mask(numDstElts);
+      for (unsigned i = 0; i != numDstElts; ++i)
+        mask[getAccessedFieldNo(i, elts)] = i;
+
+      vec = builder.createVecShuffle(loc, srcVal, mask);
+    } else if (numDstElts > numSrcElts) {
+      // Extended the source vector to the same length and then shuffle it
+      // into the destination.
+      // FIXME: since we're shuffling with undef, can we just use the indices
+      //        into that?  This could be simpler.
+      SmallVector<int64_t> extMask(numDstElts, -1);
+      std::iota(extMask.begin(), extMask.begin() + numSrcElts, 0);
+
+      mlir::Value extSrcVal = builder.createVecShuffle(loc, srcVal, extMask);
+
+      // build identity
+      SmallVector<int64_t> mask(numDstElts);
+      std::iota(mask.begin(), mask.begin() + numDstElts, 0);
+
+      // When the vector size is odd and .odd or .hi is used, the last element
+      // of the Elts constant array will be one past the size of the vector.
+      // Ignore the last element here, if it is greater than the mask size.
+      if ((unsigned)getAccessedFieldNo(numSrcElts - 1, elts) == mask.size())
+        numSrcElts--;
+
+      // modify when what gets shuffled in
+      for (unsigned i = 0; i != numSrcElts; ++i)
+        mask[getAccessedFieldNo(i, elts)] = i + numDstElts;
+
+      vec = builder.createVecShuffle(loc, vec, extSrcVal, mask);
+    } else {
+      // We should never shorten the vector
+      llvm_unreachable("unexpected shorten vector length");
+    }
+  } else {
+    // If the Src is a scalar (not a vector), and the target is a vector it
+    // must be updating one element.
+    unsigned inIdx = getAccessedFieldNo(0, elts);
+    cir::ConstantOp elt = builder.getSInt64(inIdx, loc);
+    vec = cir::VecInsertOp::create(builder, loc, vec, srcVal, elt);
+  }
+
+  builder.createStore(loc, vec, dst.getExtVectorAddress(),
+                      dst.isVolatileQualified());
+}
+
 void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
                                            bool isInit) {
  if (!dst.isSimple()) {
@@ -317,6 +403,9 @@ void CIRGenFunction::emitStoreThroughLValue(RValue src, LValue dst,
      return;
    }

+    if (dst.isExtVectorElt())
+      return emitStoreThroughExtVectorComponentLValue(src, dst);
+
    assert(dst.isBitField() && "Unknown LValue type");
    emitStoreThroughBitfieldLValue(src, dst);
    return;
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -2070,9 +2070,11 @@ public:
                         bool isInit = false, bool isNontemporal = false);
  void emitStoreOfScalar(mlir::Value value, LValue lvalue, bool isInit);

+  void emitStoreThroughExtVectorComponentLValue(RValue src, LValue dst);
+
  /// Store the specified rvalue into the specified
-  /// lvalue, where both are guaranteed to the have the same type, and that type
-  /// is 'Ty'.
+  /// lvalue, where both are guaranteed to the have the same type, and that
+  /// type is 'Ty'.
  void emitStoreThroughLValue(RValue src, LValue dst, bool isInit = false);

  mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
--- a/clang/test/CIR/CodeGen/vector-ext-element.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext-element.cpp
@@ -339,3 +339,66 @@ void array_subscript_expr_with_element_expr_base() {
 // OGCG: %[[VEC_MEMBER_EXPR:.*]] = getelementptr inbounds i32, ptr %[[A_ADDR]], i64 0
 // OGCG: %[[VEC_ELEM_PTR:.*]] = getelementptr inbounds i32, ptr %[[VEC_MEMBER_EXPR]], i64 1
 // OGCG: store i32 2, ptr %[[VEC_ELEM_PTR]], align 4
+
+void store_src_dest_same_size() {
+  vi4 a;
+  vi2 b;
+  b.xy = a.xy;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<4 x !s32i>
+// CIR: %[[SHUFFLE_A:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[POISON]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
+// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[SHUFFLE_A]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !s32i>
+// CIR: cir.store {{.*}} %[[RESULT]], %[[B_ADDR]] : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+// LLVM: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[RESULT:.*]] = shufflevector <2 x i32> %[[SHUFFLE_A]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
+// OGCG: store <2 x i32> %[[RESULT]], ptr %[[B_ADDR]], align 8
+
+void store_src_dest_not_same_size() {
+  vi4 a;
+  vi2 b;
+  a.lo = b;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>>, ["b"]
+// CIR: %[[TMP_B:.*]] = cir.load {{.*}} %[[B_ADDR]] : !cir.ptr<!cir.vector<2 x !s32i>>, !cir.vector<2 x !s32i>
+// CIR: %[[TMP_A:.*]] = cir.load {{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<2 x !s32i>
+// CIR: %[[SHUFFLE_B:.*]] = cir.vec.shuffle(%[[TMP_B]], %[[POISON]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<-1> : !s32i, #cir.int<-1> : !s32i] : !cir.vector<4 x !s32i>
+// CIR: %[[RESULT:.*]] = cir.vec.shuffle(%[[TMP_A]], %[[SHUFFLE_B]] : !cir.vector<4 x !s32i>) [#cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s32i> 
+// CIR: cir.store {{.*}} %[[RESULT]], %[[A_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <2 x i32>, i64 1, align 8
+// LLVM: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+// LLVM: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+// LLVM: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <2 x i32>, align 8
+// OGCG: %[[TMP_B:.*]] = load <2 x i32>, ptr %[[B_ADDR]], align 8
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: %[[SHUFFLE_A:.*]] = shufflevector <2 x i32> %[[TMP_B]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+// OGCG: %[[RESULT:.*]] = shufflevector <4 x i32> %[[TMP_A]], <4 x i32> %[[SHUFFLE_A]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+// OGCG: store <4 x i32> %[[RESULT]], ptr %[[A_ADDR]], align 16