[VPlan] Directly unroll VectorPointerRecipe (#168886)

In an effort to get rid of VPUnrollPartAccessor and directly unroll
recipes, start by directly unrolling VectorPointerRecipe, allowing for
VPlan-based simplifications and simplification of the corresponding
execute.
This commit is contained in:
Ramkumar Ramachandra
2025-12-15 10:54:06 +00:00
committed by GitHub
parent b3ec8be22b
commit 0636225b93
51 changed files with 901 additions and 1169 deletions

View File

@@ -1966,20 +1966,24 @@ protected:
#endif
};
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
/// A recipe to compute the pointers for widened memory accesses of \p
/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
Type *SourceElementTy;
public:
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
GEPNoWrapFlags GEPFlags, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlags, DL),
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
SourceElementTy(SourceElementTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
VPValue *getOffset() {
return getNumOperands() == 2 ? getOperand(1) : nullptr;
}
void execute(VPTransformState &State) override;
Type *getSourceElementType() const { return SourceElementTy; }
@@ -1999,14 +2003,13 @@ public:
}
VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
getGEPNoWrapFlags(), getDebugLoc());
auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
getGEPNoWrapFlags(), getDebugLoc());
if (auto *Off = getOffset())
Clone->addOperand(Off);
return Clone;
}
/// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
/// this is only accurate after the VPlan has been unrolled.
bool isFirstPart() const { return getUnrollPart(*this) == 0; }
/// Return the cost of this VPHeaderPHIRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {

View File

@@ -2526,15 +2526,12 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
void VPVectorPointerRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
unsigned CurrentPart = getUnrollPart(*this);
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this));
assert(getOffset() &&
"Expected prior simplification of recipe without offset");
Value *Ptr = State.get(getOperand(0), VPLane(0));
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Increment,
"", getGEPNoWrapFlags());
Value *Offset = State.get(getOffset(), true);
Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",
getGEPNoWrapFlags());
State.set(this, ResultPtr, /*IsScalar*/ true);
}

View File

@@ -1515,13 +1515,11 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
}
}
// VPVectorPointer for part 0 can be replaced by their start pointer.
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(Def)) {
if (VecPtr->isFirstPart()) {
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
return;
}
}
// Simplify unrolled VectorPointer without offset, or with zero offset, to
// just the pointer operand.
if (auto *VPR = dyn_cast<VPVectorPointerRecipe>(Def))
if (!VPR->getOffset() || match(VPR->getOffset(), m_ZeroInt()))
return VPR->replaceAllUsesWith(VPR->getOperand(0));
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.

View File

@@ -297,6 +297,22 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
Copy->setOperand(1, getValueForPart(Op, Part));
continue;
}
if (auto *VPR = dyn_cast<VPVectorPointerRecipe>(&R)) {
VPBuilder Builder(VPR);
const DataLayout &DL =
Plan.getScalarHeader()->getIRBasicBlock()->getDataLayout();
Type *IndexTy = DL.getIndexType(TypeInfo.inferScalarType(VPR));
Type *VFTy = TypeInfo.inferScalarType(&Plan.getVF());
VPValue *VF = Builder.createScalarZExtOrTrunc(
&Plan.getVF(), IndexTy, VFTy, DebugLoc::getUnknown());
// VFxUF does not wrap, so VF * Part also cannot wrap.
VPValue *VFxPart = Builder.createOverflowingOp(
Instruction::Mul, {VF, Plan.getConstantInt(IndexTy, Part)},
{true, true});
Copy->setOperand(0, VPR->getOperand(0));
Copy->addOperand(VFxPart);
continue;
}
if (auto *Red = dyn_cast<VPReductionRecipe>(&R)) {
auto *Phi = dyn_cast<VPReductionPHIRecipe>(R.getOperand(0));
if (Phi && Phi->isOrdered()) {
@@ -314,12 +330,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
if (isa<VPVectorEndPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}

View File

@@ -528,7 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8
@@ -544,14 +545,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double>
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]]
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]]
; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 12
; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP17]]
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[NEXT_GEP1]], align 8
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP12]], align 8

View File

@@ -21,7 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
@@ -36,9 +37,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP11]]
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]

View File

@@ -30,7 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 16
; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0
@@ -40,9 +41,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP14]]
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP13]]
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 1
; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP15]], align 1
; DEFAULT-NEXT: [[TMP16:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
@@ -56,9 +55,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i16> [[TMP22]] to <vscale x 8 x i8>
; DEFAULT-NEXT: [[TMP25:%.*]] = trunc <vscale x 8 x i16> [[TMP23]] to <vscale x 8 x i8>
; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 3
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP28]]
; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP26]], i64 [[TMP13]]
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP24]], ptr [[TMP26]], align 1
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP25]], ptr [[TMP29]], align 1
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]

View File

@@ -144,7 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; INTERLEAVE-4-VLA: vector.ph:
; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -155,14 +156,10 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]]
; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-4-VLA-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]]
; INTERLEAVE-4-VLA-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 3
; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3
; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]]
; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]]
; INTERLEAVE-4-VLA-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 12
; INTERLEAVE-4-VLA-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP12]]
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 1
; INTERLEAVE-4-VLA-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 1

View File

@@ -37,7 +37,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR3:[0-9]+]]
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR4:[0-9]+]]
; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -72,7 +72,8 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -82,17 +83,13 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]]
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP11]], ptr align 8 [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP12]], ptr align 8 [[TMP16]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -160,7 +157,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP12]], 50
; TFNONE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END]]
; TFNONE: [[IF_THEN]]:
; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR3]]
; TFNONE-NEXT: [[TMP9:%.*]] = call i64 @foo(i64 [[TMP12]]) #[[ATTR4]]
; TFNONE-NEXT: br label %[[IF_END]]
; TFNONE: [[IF_END]]:
; TFNONE-NEXT: [[TMP14:%.*]] = phi i64 [ [[TMP9]], %[[IF_THEN]] ], [ 0, %[[FOR_BODY]] ]
@@ -201,7 +198,8 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -211,8 +209,6 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison)
@@ -225,9 +221,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[TMP15]], <vscale x 2 x i64> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> [[TMP16]], <vscale x 2 x i64> zeroinitializer
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]]
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr align 8 [[TMP17]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr align 8 [[TMP20]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -308,10 +302,10 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 50
; TFNONE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
; TFNONE: [[IF_THEN]]:
; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR4:[0-9]+]]
; TFNONE-NEXT: [[TMP14:%.*]] = call i64 @foo(i64 [[TMP13]]) #[[ATTR5:[0-9]+]]
; TFNONE-NEXT: br label %[[IF_END]]
; TFNONE: [[IF_ELSE]]:
; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR4]]
; TFNONE-NEXT: [[TMP15:%.*]] = call i64 @foo(i64 0) #[[ATTR5]]
; TFNONE-NEXT: br label %[[IF_END]]
; TFNONE: [[IF_END]]:
; TFNONE-NEXT: [[TMP16:%.*]] = phi i64 [ [[TMP14]], %[[IF_THEN]] ], [ [[TMP15]], %[[IF_ELSE]] ]
@@ -355,7 +349,8 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -365,8 +360,6 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison)
@@ -385,9 +378,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP11]], <vscale x 2 x i64> [[TMP21]], <vscale x 2 x i64> [[TMP17]]
; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select <vscale x 2 x i1> [[TMP12]], <vscale x 2 x i64> [[TMP22]], <vscale x 2 x i64> [[TMP18]]
; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP25]]
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP23]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr align 8 [[TMP23]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI4]], ptr align 8 [[TMP26]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -464,7 +455,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]]
; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -481,7 +472,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; TFALWAYS-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFALWAYS-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
; TFALWAYS-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]]
; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -512,7 +503,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[VECTOR_BODY]] ]
; TFFALLBACK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFFALLBACK-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
; TFFALLBACK-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]]
; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -529,7 +520,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFA_INTERLEAVE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFA_INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR5:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFA_INTERLEAVE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFA_INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -588,7 +579,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFNONE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[N_VEC]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; TFNONE-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: [[LOAD:%.*]] = load i64, ptr [[GEP]], align 8
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR6:[0-9]+]]
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[LOAD]]) #[[ATTR7:[0-9]+]]
; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -645,7 +636,8 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -655,17 +647,13 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ splat (i1 true), %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[TMP7]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x i64> poison)
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[WIDE_MASKED_LOAD3]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP15]]
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP11]], ptr align 8 [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP12]], ptr align 8 [[TMP16]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
@@ -739,7 +727,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFNONE-NEXT: [[LOAD:%.*]] = load double, ptr [[GEP]], align 8
; TFNONE-NEXT: [[MULADD]] = tail call double @llvm.fmuladd.f64(double [[LOAD]], double [[M]], double [[FMA_SUM]])
; TFNONE-NEXT: [[TOINT:%.*]] = fptoui double [[LOAD]] to i64
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[TOINT]]) #[[ATTR3]]
; TFNONE-NEXT: [[CALL:%.*]] = call i64 @foo(i64 [[TOINT]]) #[[ATTR4]]
; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 8
; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -811,7 +799,8 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFA_INTERLEAVE-SAME: ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], double [[M:%.*]]) #[[ATTR0]] {
; TFA_INTERLEAVE-NEXT: [[ENTRY:.*]]:
; TFA_INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP3]], 2
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP9]], 2
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 1025)
@@ -824,8 +813,6 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 1
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; TFA_INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
@@ -836,9 +823,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call <vscale x 2 x i64> @foo_vector(<vscale x 2 x i64> [[TMP14]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]]
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP9]]
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP15]], ptr align 8 [[TMP17]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP16]], ptr align 8 [[TMP20]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> [[TMP11]], <vscale x 2 x double> splat (double -0.000000e+00)
@@ -912,7 +897,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFNONE: [[LOOP]]:
; TFNONE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_END:.*]] ]
; TFNONE-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8
; TFNONE-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]]
; TFNONE-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR8:[0-9]+]]
; TFNONE-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00
; TFNONE-NEXT: br i1 [[COND1]], label %[[LOOP_MIDDLE:.*]], label %[[LOOP_END]]
; TFNONE: [[LOOP_MIDDLE]]:
@@ -933,7 +918,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFCOMMON: [[LOOP]]:
; TFCOMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2]], align 8
; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR6:[0-9]+]]
; TFCOMMON-NEXT: [[EXP:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR7:[0-9]+]]
; TFCOMMON-NEXT: [[COND1:%.*]] = fcmp ogt double [[EXP]], 0.000000e+00
; TFCOMMON-NEXT: [[SINK:%.*]] = select i1 [[COND1]], double 0.000000e+00, double 1.000000e+00
; TFCOMMON-NEXT: store double [[SINK]], ptr [[P]], align 8
@@ -958,7 +943,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP9]] ]
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP9]] ]
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR6:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = fcmp ogt double [[TMP6]], 0.000000e+00
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP8]], double 0.000000e+00, double 1.000000e+00
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = or i1 [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK2]]

View File

@@ -12,7 +12,8 @@ define i32 @sudot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -21,15 +22,11 @@ define i32 @sudot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP17]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD3]] to <vscale x 16 x i32>
@@ -115,7 +112,8 @@ define i32 @usdot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -124,15 +122,11 @@ define i32 @usdot(ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP6]], i64 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 [[TMP1]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP17]], align 1
; CHECK-NEXT: [[TMP18:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD3]] to <vscale x 16 x i32>

View File

@@ -43,7 +43,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -52,15 +53,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX1]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP20]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP20]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX1]]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP28]], i64 [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP28]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP9]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD3]] to <vscale x 16 x i32>
@@ -153,7 +150,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI]], <16 x i64> [[TMP2]])
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[PARTIAL_REDUCE]])
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
@@ -206,7 +203,7 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE14]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v16i64(<2 x i64> [[VEC_PHI3]], <16 x i64> [[TMP17]])
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[PARTIAL_REDUCE6]], [[PARTIAL_REDUCE]]
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX15:%.*]] = add <2 x i64> [[PARTIAL_REDUCE13]], [[BIN_RDX]]
@@ -288,7 +285,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> [[VEC_PHI]], <8 x i64> [[TMP2]])
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[PARTIAL_REDUCE]])
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
@@ -343,7 +340,7 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE15]] = call <2 x i64> @llvm.vector.partial.reduce.add.v2i64.v8i64(<2 x i64> [[VEC_PHI3]], <8 x i64> [[TMP17]])
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[PARTIAL_REDUCE7]], [[PARTIAL_REDUCE]]
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX16:%.*]] = add <2 x i64> [[PARTIAL_REDUCE14]], [[BIN_RDX]]
@@ -487,7 +484,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP69]] = add <16 x i32> [[TMP68]], [[VEC_PHI]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP71:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]])
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
@@ -646,7 +643,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP138]] = add <16 x i32> [[TMP136]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP141:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP141]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP141]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP138]], [[TMP137]]
; CHECK-INTERLEAVED-NEXT: [[TMP142:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]])
@@ -785,7 +782,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = add <16 x i32> [[TMP4]], [[TMP5]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
; CHECK-INTERLEAVE1-NEXT: br label [[FOR_EXIT:%.*]]
@@ -818,7 +815,7 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = add <16 x i32> [[TMP9]], [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP11]], i32 15
; CHECK-INTERLEAVED-NEXT: br label [[FOR_EXIT:%.*]]
@@ -897,7 +894,7 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = add <vscale x 8 x i32> [[TMP16]], [[TMP15]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = mul nuw i32 [[TMP23]], 8
@@ -913,29 +910,26 @@ define i32 @not_dotp_not_phi(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-INTERLEAVED: vector.body:
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP14]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP19]]
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP16]], i64 [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x i8>, ptr [[TMP20]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD1]] to <vscale x 8 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = mul <vscale x 8 x i32> [[TMP22]], [[TMP15]]
; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = add <vscale x 8 x i32> [[TMP30]], [[TMP22]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8
@@ -1075,7 +1069,8 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP13]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP16]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[NUM_IN]], [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[NUM_IN]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1100,14 +1095,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]]
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP1]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP17]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 [[TMP19]]
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 16 x i8>, ptr [[TMP2]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 16 x i8>, ptr [[TMP20]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD9]] to <vscale x 16 x i32>
@@ -1118,14 +1109,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD8]] to <vscale x 16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <vscale x 16 x i32> [[TMP24]], [[TMP25]]
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI7]], <vscale x 16 x i32> [[TMP26]])
; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP28]]
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <vscale x 16 x i8>, ptr [[TMP4]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = shl nuw i64 [[TMP30]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 [[TMP31]]
; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 16 x i8>, ptr [[TMP63]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD12]] to <vscale x 16 x i32>
@@ -1136,14 +1123,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD15]] to <vscale x 16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = mul nsw <vscale x 16 x i32> [[TMP36]], [[TMP37]]
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI5]], <vscale x 16 x i32> [[TMP38]])
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = shl nuw i64 [[TMP39]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP40]]
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <vscale x 16 x i8>, ptr [[TMP41]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = shl nuw i64 [[TMP42]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP43]]
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <vscale x 16 x i8>, ptr [[TMP44]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD18]] to <vscale x 16 x i32>
@@ -1154,14 +1137,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD21]] to <vscale x 16 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = mul nsw <vscale x 16 x i32> [[TMP48]], [[TMP49]]
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE23]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI3]], <vscale x 16 x i32> [[TMP50]])
; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP52:%.*]] = shl nuw i64 [[TMP51]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP52]]
; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <vscale x 16 x i8>, ptr [[TMP53]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP54:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = shl nuw i64 [[TMP54]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[TMP55]]
; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 [[TMP16]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <vscale x 16 x i8>, ptr [[TMP11]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <vscale x 16 x i8>, ptr [[TMP56]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = sext <vscale x 16 x i8> [[WIDE_LOAD24]] to <vscale x 16 x i32>
@@ -1487,7 +1466,8 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1496,17 +1476,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP24]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP3]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP5]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]]
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP11]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD3]] to <vscale x 8 x i32>
@@ -1630,7 +1606,8 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 41, [[TMP3]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 41, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -1639,16 +1616,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 [[TMP6]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[INDEX]], 1
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]]
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[TMP11]]
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[TMP6]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP15]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP12]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD3]] to <vscale x 16 x i64>
@@ -1814,7 +1787,7 @@ define void @not_dotp_not_phi2(ptr %matrix, i32 %n) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP23]] = add i32 [[TMP21]], [[TMP15]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP23]], [[TMP22]]
; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
@@ -1926,7 +1899,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -1964,7 +1937,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]])
@@ -2055,7 +2028,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <8 x i64> [[TMP4]], [[VEC_PHI]]
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -2093,7 +2066,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 {
; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <8 x i64> [[TMP7]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <8 x i64> [[TMP9]], [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[BIN_RDX]])
@@ -2213,7 +2186,8 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP3]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP10]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = trunc i64 [[N_VEC]] to i32
@@ -2227,14 +2201,10 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE6:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP23]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP10]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[NEXT_GEP]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP11]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 [[TMP13]]
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 [[TMP10]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[NEXT_GEP2]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP14]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD4]] to <vscale x 16 x i64>
@@ -2411,7 +2381,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP34]])
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-INTERLEAVE1: middle.block:
; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE21]])
; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE20]])
@@ -2511,7 +2481,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE21]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP34]])
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-INTERLEAVED: middle.block:
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE21]])
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE20]])

View File

@@ -44,7 +44,8 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -53,15 +54,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP20]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP8]]
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP14]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP9]], align 1
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD3]] to <vscale x 16 x i32>

View File

@@ -41,7 +41,8 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP1]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -50,8 +51,6 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP0]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
@@ -235,7 +234,8 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -244,8 +244,6 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP1]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
@@ -341,7 +339,8 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -350,8 +349,6 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP1]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i16>, ptr [[TMP1]], align 2
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i16>, ptr [[TMP5]], align 2
@@ -737,7 +734,8 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -748,14 +746,10 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = mul nuw nsw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw nsw i64 [[TMP4]], 3
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP7]]
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 24
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP10]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP1]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x i8>, ptr [[TMP5]], align 1
@@ -859,7 +853,8 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP2]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -868,8 +863,6 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP1]], align 1
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP5]], align 1
@@ -979,7 +972,8 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP13]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP13]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP16]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP4]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -993,8 +987,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]]
; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP16]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext i32 [[TMP16]] to i64
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 [[TMP9]]
; CHECK-INTERLEAVED-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP3]], align 1
; CHECK-INTERLEAVED-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP10]], align 1
@@ -1110,7 +1103,8 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP11]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-INTERLEAVED: vector.ph:
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP3]], 32
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP3]], 16
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP8]], 2
; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], [[TMP12]]
; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]]
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[D]], [[N_VEC]]
@@ -1124,8 +1118,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 {
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ]
; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]]
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]]
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP8]], 4
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP14]]
; CHECK-INTERLEAVED-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4]], align 1
; CHECK-INTERLEAVED-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP10]], align 1

View File

@@ -294,7 +294,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4
; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0
@@ -305,9 +306,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 2
; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP17]]
; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[TMP13]], i64 [[TMP5]]
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP13]], align 2
; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i16>, ptr [[TMP18]], align 2
; DEFAULT-NEXT: [[TMP19:%.*]] = udiv <vscale x 4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
@@ -355,7 +354,8 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 4
; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP7]], 2
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[X]], i64 0
@@ -366,9 +366,7 @@ define i16 @reduce_udiv(ptr %src, i16 %x, i64 %N) #0 {
; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX]]
; VSCALEFORTUNING2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i64 [[TMP11]]
; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP8]], i64 [[TMP7]]
; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2
; VSCALEFORTUNING2-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x i16>, ptr [[TMP12]], align 2
; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = udiv <vscale x 4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]

View File

@@ -19,7 +19,8 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 64
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 510, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 510, [[N_MOD_VF]]
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add i64 3, [[N_VEC]]
@@ -28,29 +29,19 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 4
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP11]], 5
; CHECK-NEXT: [[TMP27:%.*]] = mul nuw nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw nsw i64 [[TMP4]], 3
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP27]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[TMP15]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 4
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 5
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], 48
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP25]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP27]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i64 [[TMP15]]
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <vscale x 16 x i8>, ptr [[TMP29]], align 1
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 16 x i8>, ptr [[TMP20]], align 1
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1

View File

@@ -20,16 +20,15 @@ define void @cost_store_i8(ptr %dst) #0 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]]
; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]]
; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP5]]
; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP4]], align 1
; DEFAULT-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP7]], align 1
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -134,7 +133,8 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; DEFAULT: vector.ph:
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP2]], 16
; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP16]], 2
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i16> poison, i16 [[X]], i64 0
@@ -149,8 +149,6 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 {
; DEFAULT: vector.body:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 4
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP16]]
; DEFAULT-NEXT: store <vscale x 16 x i8> [[TMP14]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; DEFAULT-NEXT: store <vscale x 16 x i8> [[TMP14]], ptr [[TMP17]], align 1, !alias.scope [[META9]], !noalias [[META6]]

View File

@@ -79,8 +79,8 @@ define void @struct_return_replicate(ptr noalias %in, ptr noalias writeonly %out
; CHECK: [[ENTRY:.*:]]
; CHECK: [[VECTOR_PH:.*:]]
; CHECK: [[VECTOR_BODY:.*:]]
; CHECK: [[TMP2:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR2:[0-9]+]]
; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP3:%.*]]) #[[ATTR2]]
; CHECK: [[TMP3:%.*]] = tail call { half, half } @foo(half [[TMP1:%.*]]) #[[ATTR2:[0-9]+]]
; CHECK: [[TMP4:%.*]] = tail call { half, half } @foo(half [[TMP2:%.*]]) #[[ATTR2]]
; CHECK: [[MIDDLE_BLOCK:.*:]]
; CHECK: [[EXIT:.*:]]
;
@@ -143,16 +143,13 @@ exit:
define void @struct_return_scalable(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) #2 {
; CHECK-LABEL: define void @struct_return_scalable(
; CHECK-SAME: ptr noalias [[IN:%.*]], ptr noalias writeonly [[OUT_A:%.*]], ptr noalias writeonly [[OUT_B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK: [[VECTOR_PH:.*:]]
; CHECK: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK: [[VECTOR_PH1:.*:]]
; CHECK: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK: [[VECTOR_BODY:.*:]]
; CHECK: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK: [[VECTOR_BODY1:.*:]]
; CHECK: [[TMP12:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD:%.*]], <vscale x 8 x i1> splat (i1 true))
; CHECK: [[TMP13:%.*]] = call { <vscale x 8 x half>, <vscale x 8 x half> } @scalable_vec_masked_foo(<vscale x 8 x half> [[WIDE_LOAD1:%.*]], <vscale x 8 x i1> splat (i1 true))
; CHECK: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
; CHECK: [[MIDDLE_BLOCK:.*:]]
; CHECK: [[SCALAR_PH:.*:]]
; CHECK: [[FOR_BODY:.*:]]

View File

@@ -16,7 +16,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -25,9 +26,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i64>, ptr [[TMP15]], align 8
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[WIDE_LOAD]])
@@ -43,7 +42,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -59,7 +58,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: [[TMP27]] = and i64 [[VEC_PHI8]], [[TMP26]]
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[CMP_N6]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -75,7 +74,7 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
; CHECK-NEXT: [[AND]] = and i64 [[RDX]], [[L3]]
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[AND_LCSSA]]

View File

@@ -16,7 +16,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -25,9 +26,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ insertelement (<vscale x 2 x i64> zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 1
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i64>, ptr [[TMP15]], align 8
; CHECK-NEXT: [[TMP16]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
@@ -42,7 +41,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -58,7 +57,7 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[WIDE_LOAD9]], [[VEC_PHI8]]
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]]
; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP26]])
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]

View File

@@ -16,7 +16,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -24,9 +25,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[WIDE_LOAD]])
@@ -39,7 +38,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
; CHECK: vec.epilog.ph:
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -54,7 +53,7 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-NEXT: [[TMP24]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI7]], <2 x float> [[WIDE_LOAD8]])
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]]
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: vec.epilog.middle.block:
; CHECK-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]]
; CHECK-NEXT: br i1 [[CMP_N5]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]]

View File

@@ -27,16 +27,15 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP4]], align 1
; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -60,16 +59,15 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP4]], align 1
; CHECK-VF8-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -119,16 +117,15 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP3]]
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP2]], align 1
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
@@ -151,16 +148,15 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP4]], align 1
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -230,16 +226,15 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP4]], align 1
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -282,16 +277,15 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-VF8-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP4]], align 1
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -351,16 +345,15 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]]
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -402,16 +395,15 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-VF8-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]]
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]]
; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1
; CHECK-VF8-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -473,22 +465,19 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP5]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -535,22 +524,19 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]]
; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]]
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]]
; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]]
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -597,22 +583,19 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -636,22 +619,19 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
; CHECK-VF8-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-VF8: vector.ph:
; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-VF8-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-VF8: vector.body:
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP4]]
; CHECK-VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 [[TMP3]]
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP2]], align 4
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF8-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP8]]
; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP3]]
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]

View File

@@ -36,16 +36,15 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 16
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP5]]
; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP4]], align 1
; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -90,16 +89,15 @@ define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 {
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vector.ph:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP4]], 16
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP7]], 2
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-EPILOG-PREFER-SCALABLE: vector.body:
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP8]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP7]]
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP6]], align 1
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP9]], align 1
; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]

View File

@@ -28,24 +28,21 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 16
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP6]], 8
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP8]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds half, ptr [[S]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds half, ptr [[TMP11]], i64 [[TMP8]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP11]], align 2
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x half>, ptr [[TMP15]], align 2
; CHECK-NEXT: [[TMP16:%.*]] = fneg <vscale x 8 x half> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP17:%.*]] = fneg <vscale x 8 x half> [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds half, ptr [[D]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 3
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP21]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds half, ptr [[TMP18]], i64 [[TMP8]]
; CHECK-NEXT: store <vscale x 8 x half> [[TMP16]], ptr [[TMP18]], align 2
; CHECK-NEXT: store <vscale x 8 x half> [[TMP17]], ptr [[TMP22]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]

View File

@@ -218,6 +218,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i64 [[TMP7]], -1
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
@@ -227,12 +228,11 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]]
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP2]], 5
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]]
; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]]
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl i64 [[TMP8]], 3
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[DOTIDX3]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]])
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
@@ -240,9 +240,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC1]])
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC2]], 0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP12]], i64 [[DOTIDX]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP3]]
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]

View File

@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -S < %s -debug -prefer-predicate-over-epilogue=scalar-epilogue 2>%t | FileCheck %s
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
@@ -9,10 +10,15 @@ target triple = "aarch64-unknown-linux-gnu"
define void @induction_i7(ptr %dst) #0 {
; CHECK-LABEL: define void @induction_i7(
; CHECK-SAME: ptr [[DST:%.*]])
; CHECK: vector.ph:
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2
@@ -26,21 +32,23 @@ define void @induction_i7(ptr %dst) #0 {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i7> zeroinitializer, [[TMP9]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i7> [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i7> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i7> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP23:%.*]] = zext <vscale x 2 x i7> [[VEC_IND]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP24:%.*]] = zext <vscale x 2 x i7> [[STEP_ADD]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP27:%.*]] = shl nuw i64 [[TMP26]], 1
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP27]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP40]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP23]], ptr [[TMP21]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP24]], ptr [[TMP28]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i7> [[STEP_ADD]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label %middle.block, label %[[VECTOR_BODY]]
; CHECK-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
;
entry:
@@ -68,10 +76,15 @@ for.end: ; preds = %for.body
define void @induction_i3_zext(ptr %dst) #0 {
; CHECK-LABEL: define void @induction_i3_zext(
; CHECK-SAME: ptr [[DST:%.*]])
; CHECK: vector.ph:
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]]
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP40:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP40]], i64 0
; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP40]], 2
@@ -85,21 +98,23 @@ define void @induction_i3_zext(ptr %dst) #0 {
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i3> zeroinitializer, [[TMP9]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i3> [ [[INDUCTION]], %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i3> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i3> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i3> [[VEC_IND]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP20:%.*]] = zext <vscale x 2 x i3> [[STEP_ADD]] to <vscale x 2 x i64>
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i64 [[TMP24]], 1
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP25]]
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP40]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP19]], ptr [[TMP21]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP20]], ptr [[TMP26]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i3> [[STEP_ADD]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP27]], label %middle.block, label %[[VECTOR_BODY]]
; CHECK-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %for.body

View File

@@ -16,7 +16,8 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP6]], 2
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8
@@ -28,9 +29,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 1
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP10]]
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP30]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> zeroinitializer, ptr [[TMP35]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]

View File

@@ -28,7 +28,8 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 {
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]]
@@ -37,15 +38,12 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP13]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP19]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x i32>, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP25]], align 4
; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD3]], ptr [[TMP30]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]]
@@ -98,7 +96,8 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 {
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP7]], 4
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP11]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 [[N_MOD_VF]]
@@ -107,15 +106,12 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 2
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP23]]
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP13]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP19]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x i32>, ptr [[TMP24]], align 4
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP29]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP13]]
; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD]], ptr [[TMP25]], align 4
; CHECK-NEXT: store <vscale x 4 x i32> [[WIDE_LOAD3]], ptr [[TMP30]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP8]]

View File

@@ -71,7 +71,8 @@ define void @always_taken(ptr noalias %p0, ptr noalias %p1, i1 %c0, i1 %c1) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1024, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1024, [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i1> poison, i1 [[C1]], i64 0
@@ -83,15 +84,12 @@ define void @always_taken(ptr noalias %p0, ptr noalias %p1, i1 %c0, i1 %c1) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P0]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP8]], 1
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP10]], i64 [[TMP7]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP10]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP20]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P1]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP13]], 1
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP9]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP9]], i64 [[TMP7]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> poison)
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[WIDE_MASKED_LOAD4]], <vscale x 2 x i64> splat (i64 1)

View File

@@ -43,7 +43,8 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP17]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP16]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -51,28 +52,20 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i64, ptr [[SRC_1]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[SRC_2]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 1
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP29]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP27]], i64 [[TMP17]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP27]], align 8
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <vscale x 2 x i64>, ptr [[TMP30]], align 8
; CHECK-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP31]], i64 [[TMP33]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP31]], i64 [[TMP17]]
; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <vscale x 2 x i64>, ptr [[TMP31]], align 8
; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <vscale x 2 x i64>, ptr [[TMP34]], align 8
; CHECK-NEXT: [[TMP35:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[WIDE_LOAD13]]
; CHECK-NEXT: [[TMP36:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD12]], [[WIDE_LOAD14]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST_1]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i64, ptr [[DST_2]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP43:%.*]] = shl nuw i64 [[TMP42]], 1
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP43]]
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[TMP41]], i64 [[TMP17]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP35]], ptr [[TMP41]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP36]], ptr [[TMP44]], align 8
; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 1
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP47]]
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i64, ptr [[TMP45]], i64 [[TMP17]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP35]], ptr [[TMP45]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP36]], ptr [[TMP48]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]

View File

@@ -11,7 +11,8 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 16
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 4
; CHECK-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP6]]
@@ -40,14 +41,10 @@ define void @simple_memset(i32 %val, ptr %ptr, i64 %n) #0 {
; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT13:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]]
; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3
; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP47]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP54]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK7]])
@@ -97,7 +94,8 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[UMAX]], [[TMP3]]
@@ -126,14 +124,10 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY4]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT15:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACTIVE_LANE_MASK9:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY5]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT16:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[COND_PTR:%.*]], i64 [[INDEX6]]
; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP53:%.*]] = shl nuw i64 [[TMP52]], 2
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP53]]
; CHECK-NEXT: [[TMP55:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP56:%.*]] = shl nuw i64 [[TMP55]], 3
; CHECK-NEXT: [[TMP56:%.*]] = mul nuw nsw i64 [[TMP1]], 2
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP56]]
; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP59:%.*]] = mul nuw i64 [[TMP58]], 12
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr i32, ptr [[TMP47]], i64 [[TMP59]]
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP47]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
; CHECK-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP54]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK7]], <vscale x 4 x i32> poison)
@@ -148,15 +142,9 @@ define void @cond_memset(i32 %val, ptr noalias readonly %cond_ptr, ptr noalias %
; CHECK-NEXT: [[TMP71:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK8]], <vscale x 4 x i1> [[TMP63]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP72:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK9]], <vscale x 4 x i1> [[TMP64]], <vscale x 4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX6]]
; CHECK-NEXT: [[TMP74:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP75:%.*]] = shl nuw i64 [[TMP74]], 2
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP75]]
; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP78:%.*]] = shl nuw i64 [[TMP77]], 3
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP78]]
; CHECK-NEXT: [[TMP80:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP81:%.*]] = mul nuw i64 [[TMP80]], 12
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP81]]
; CHECK-NEXT: [[TMP76:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP56]]
; CHECK-NEXT: [[TMP82:%.*]] = getelementptr i32, ptr [[TMP65]], i64 [[TMP59]]
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP65]], <vscale x 4 x i1> [[TMP69]])
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP76]], <vscale x 4 x i1> [[TMP70]])
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP79]], <vscale x 4 x i1> [[TMP71]])

View File

@@ -61,17 +61,14 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -182,22 +179,19 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -252,22 +246,19 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP3]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -328,22 +319,19 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP6]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP6]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP6]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
@@ -402,22 +390,19 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]

View File

@@ -42,7 +42,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: br label [[VECTOR_PH1:%.*]]
; CHECK-UF4: vector.ph:
; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 64
; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP61]], 16
; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6
; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -61,14 +62,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 16 x i1> [ [[TMP18]], [[VECTOR_PH1]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
; CHECK-UF4-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4
; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP23]]
; CHECK-UF4-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 5
; CHECK-UF4-NEXT: [[TMP32:%.*]] = mul nuw nsw i64 [[TMP1]], 2
; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]]
; CHECK-UF4-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP34]], 48
; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP29]]
; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP20]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP24]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 16 x i8> poison)
@@ -79,15 +76,9 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-UF4-NEXT: [[TMP27:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD10]], splat (i8 3)
; CHECK-UF4-NEXT: [[TMP28:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD11]], splat (i8 3)
; CHECK-UF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
; CHECK-UF4-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 4
; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP38]]
; CHECK-UF4-NEXT: [[TMP40:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP41:%.*]] = shl nuw i64 [[TMP40]], 5
; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP41]]
; CHECK-UF4-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], 48
; CHECK-UF4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP44]]
; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP32]]
; CHECK-UF4-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP29]]
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP25]], ptr align 1 [[TMP35]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr align 1 [[TMP39]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK6]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr align 1 [[TMP42]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]])
@@ -109,7 +100,8 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-TF: vector.ph:
; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 32
; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 16
; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2
; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5
; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -124,17 +116,13 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = phi <vscale x 16 x i1> [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-TF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
; CHECK-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4
; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP11]]
; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[TMP10]]
; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP9]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP12]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 16 x i8> poison)
; CHECK-TF-NEXT: [[TMP13:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3)
; CHECK-TF-NEXT: [[TMP14:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD2]], splat (i8 3)
; CHECK-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
; CHECK-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4
; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[TMP17]]
; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[TMP10]]
; CHECK-TF-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP13]], ptr align 1 [[TMP15]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-TF-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP14]], ptr align 1 [[TMP18]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK1]])
; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
@@ -206,7 +194,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-UF4: vector.ph:
; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; CHECK-UF4-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3
; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]]
@@ -225,14 +214,10 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK7:%.*]] = phi <vscale x 2 x i1> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 2 x i1> [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]]
; CHECK-UF4-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 1
; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP28]]
; CHECK-UF4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-UF4-NEXT: [[TMP21:%.*]] = mul nuw nsw i64 [[TMP1]], 2
; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw nsw i64 [[TMP1]], 3
; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]]
; CHECK-UF4-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 6
; CHECK-UF4-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP24]]
; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP15]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; CHECK-UF4-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP29]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK6]], <vscale x 2 x double> poison)
@@ -243,15 +228,9 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-UF4-NEXT: [[TMP18:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD10]], splat (double 3.000000e+00)
; CHECK-UF4-NEXT: [[TMP19:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD11]], splat (double 3.000000e+00)
; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; CHECK-UF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1
; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP33]]
; CHECK-UF4-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP36:%.*]] = shl nuw i64 [[TMP35]], 2
; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP36]]
; CHECK-UF4-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-UF4-NEXT: [[TMP39:%.*]] = mul nuw i64 [[TMP38]], 6
; CHECK-UF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP39]]
; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP1]]
; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP21]]
; CHECK-UF4-NEXT: [[TMP40:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP24]]
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP16]], ptr align 8 [[TMP30]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP17]], ptr align 8 [[TMP34]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK6]])
; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr align 8 [[TMP37]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]])
@@ -276,7 +255,8 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-TF-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK-TF: vector.ph:
; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-TF-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-TF-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP10]], 2
; CHECK-TF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-TF-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[TMP3]]
@@ -291,17 +271,13 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = phi <vscale x 2 x i1> [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; CHECK-TF-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]]
; CHECK-TF-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP11]]
; CHECK-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; CHECK-TF-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP12]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK1]], <vscale x 2 x double> poison)
; CHECK-TF-NEXT: [[TMP13:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD]], splat (double 3.000000e+00)
; CHECK-TF-NEXT: [[TMP14:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD2]], splat (double 3.000000e+00)
; CHECK-TF-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; CHECK-TF-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-TF-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 1
; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP17]]
; CHECK-TF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP10]]
; CHECK-TF-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP13]], ptr align 8 [[TMP15]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-TF-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr align 8 [[TMP18]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK1]])
; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]

View File

@@ -23,6 +23,7 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP4]], 2
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP5]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]]
@@ -47,17 +48,13 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], splat (i32 1)
; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], splat (i32 1)
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP16]], 4
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 [[DOTIDX]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP15]], i64 [[TMP3]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP17]], align 4
; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], splat (i32 1)
; CHECK-NEXT: [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX5:%.*]] = shl nuw nsw i64 [[TMP21]], 4
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 [[DOTIDX5]]
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP20]], i64 [[TMP3]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP18]], ptr [[TMP20]], align 4
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP19]], ptr [[TMP22]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
@@ -134,6 +131,7 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP5]], 2
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNOT]]
@@ -148,16 +146,12 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP7]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD6]], splat (i32 1)
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX7:%.*]] = shl nuw nsw i64 [[TMP11]], 4
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP5]], i64 [[DOTIDX7]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP7]]
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP9]], ptr [[NEXT_GEP5]], align 4
; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]

View File

@@ -394,8 +394,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP15]], 4
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[DOTIDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP17]], align 4

View File

@@ -36,41 +36,38 @@ define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i6
; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; INTERLEAVE-NEXT: entry:
; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 2
; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 [[N]])
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]]
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP8]], i64 [[TMP1]]
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[TMP10:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4
; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]]
; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP1]]
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP10]], ptr align 8 [[TMP12]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr align 8 [[TMP14]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1
; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr align 8 [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]]
; INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
; INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP5]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 [[TMP5]])
; INTERLEAVE-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; INTERLEAVE-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; INTERLEAVE: for.cond.cleanup:
; INTERLEAVE-NEXT: ret void
;
@@ -122,41 +119,38 @@ define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i3
; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; INTERLEAVE-NEXT: entry:
; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2
; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
; INTERLEAVE-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 2
; INTERLEAVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 2
; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP6]], i64 [[N]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP7]], i64 [[N]])
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP8]], 4
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 [[DOTIDX]]
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP7]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP8]], i64 [[TMP1]]
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr align 8 [[TMP9]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
; INTERLEAVE-NEXT: [[TMP10:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; INTERLEAVE-NEXT: [[TMP11:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl i64 [[TMP13]], 4
; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX5]]
; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP1]]
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP10]], ptr align 8 [[TMP12]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr align 8 [[TMP14]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
; INTERLEAVE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 1
; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], [[TMP16]]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP17]], i64 [[TMP4]])
; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP11]], ptr align 8 [[TMP13]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]]
; INTERLEAVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; INTERLEAVE-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 1
; INTERLEAVE-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]]
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP5]])
; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP16]], i64 [[TMP5]])
; INTERLEAVE-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
; INTERLEAVE-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
; INTERLEAVE: for.cond.cleanup:
; INTERLEAVE-NEXT: ret void
;
@@ -188,7 +182,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds nuw double, ptr [[DST]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -212,7 +206,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE: pred.store.if:
; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
; INTERLEAVE-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR4:[0-9]+]]
; INTERLEAVE-NEXT: [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
; INTERLEAVE-NEXT: store double [[TMP3]], ptr [[TMP4]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]]
@@ -222,7 +216,7 @@ define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64
; INTERLEAVE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1
; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP5]]
; INTERLEAVE-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8
; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR4]]
; INTERLEAVE-NEXT: [[TMP8:%.*]] = call double @foo(double [[TMP7]], i64 [[TMP5]]) #[[ATTR5]]
; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP5]]
; INTERLEAVE-NEXT: store double [[TMP8]], ptr [[TMP9]], align 8
; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]]

View File

@@ -175,14 +175,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL46:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX47:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL48:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX57:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL58:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL46]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX47]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL48]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL56]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX57]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL58]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1
; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1

View File

@@ -96,11 +96,11 @@ define void @test(ptr %arr, i32 %len) {
; CHECK-NEXT: br i1 [[CMP_N28]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX29:%.*]] = phi double [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX35:%.*]] = phi double [ [[TMP28]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP21]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[REDX_05:%.*]] = phi double [ [[BC_MERGE_RDX29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[REDX_05:%.*]] = phi double [ [[BC_MERGE_RDX35]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP29:%.*]] = load double, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ADD]] = fadd fast double [[TMP29]], [[REDX_05]]

View File

@@ -375,16 +375,15 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED-UF2: vector.body:
; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]]
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]]
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
@@ -498,16 +497,15 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED-UF2: vector.body:
; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]]
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]]
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
@@ -681,24 +679,21 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP6]], 4
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP9]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP7]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED-UF2: vector.body:
; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP10]]
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]]
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD4]], splat (i32 1)
; NOSTRIDED-UF2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P2]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; NOSTRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP16]]
; NOSTRIDED-UF2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP9]]
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[TMP14]], align 4
; NOSTRIDED-UF2-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP17]], align 4
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
@@ -996,16 +991,15 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; NOSTRIDED-UF2: vector.ph:
; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4
; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
; NOSTRIDED-UF2-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
; NOSTRIDED-UF2-NEXT: br label [[VECTOR_BODY:%.*]]
; NOSTRIDED-UF2: vector.body:
; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP6]]
; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP5]]
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4
; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i32>, ptr [[TMP7]], align 4
; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
@@ -1351,9 +1345,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; NOSTRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
; NOSTRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[INDEX]]
; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP11]]
; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP3]]
; NOSTRIDED-UF2-NEXT: store <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8
; NOSTRIDED-UF2-NEXT: store <vscale x 2 x i64> [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8
; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
@@ -1435,9 +1427,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) {
; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP7]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison)
; STRIDED-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[OUT:%.*]], i64 [[INDEX]]
; STRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; STRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1
; STRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP11]]
; STRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i64, ptr [[TMP9]], i64 [[TMP3]]
; STRIDED-UF2-NEXT: store <vscale x 2 x i64> [[WIDE_MASKED_GATHER]], ptr [[TMP9]], align 8
; STRIDED-UF2-NEXT: store <vscale x 2 x i64> [[WIDE_MASKED_GATHER1]], ptr [[TMP12]], align 8
; STRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]

View File

@@ -86,12 +86,12 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
; AUTO_VEC-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL13:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ]
; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ]
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL12]], %[[VEC_EPILOG_SCALAR_PH]] ]
; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ]
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4
; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
@@ -225,11 +225,11 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; AUTO_VEC: [[SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]]
; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8
; AUTO_VEC-NEXT: [[I_NEXT]] = add i64 [[I]], 1
@@ -381,12 +381,12 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
; AUTO_VEC-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]:
; AUTO_VEC-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL16:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: [[BC_RESUME_VAL17:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ]
; AUTO_VEC-NEXT: br label %[[LOOP:.*]]
; AUTO_VEC: [[LOOP]]:
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ]
; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]]
; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]]

View File

@@ -222,14 +222,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[ADD7_LCSSA]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]]
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
@@ -453,14 +453,14 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
; MAX-BW-NEXT: br i1 true, label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]]
; MAX-BW: [[VEC_EPILOG_SCALAR_PH]]:
; MAX-BW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; MAX-BW-NEXT: [[BC_MERGE_RDX13:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; MAX-BW-NEXT: [[BC_MERGE_RDX15:%.*]] = phi i32 [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP149]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; MAX-BW-NEXT: br label %[[FOR_BODY:.*]]
; MAX-BW: [[FOR_COND_CLEANUP]]:
; MAX-BW-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], %[[FOR_BODY]] ], [ [[TMP149]], %[[MIDDLE_BLOCK]] ], [ [[TMP170]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
; MAX-BW-NEXT: ret i32 [[ADD7_LCSSA]]
; MAX-BW: [[FOR_BODY]]:
; MAX-BW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ]
; MAX-BW-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD7]], %[[FOR_BODY]] ]
; MAX-BW-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
; MAX-BW-NEXT: [[TMP150:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA1]]
; MAX-BW-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]

View File

@@ -44,16 +44,16 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) {
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP6:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[R:%.*]] = phi i64 [ [[TMP7:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP5]], [[R]]
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP7]] = add i64 [[TMP6]], [[R]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ [[TMP6]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP7]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[TMP8]]
;
entry:
br label %for.body

View File

@@ -164,12 +164,12 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) {
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX9:%.*]] = phi float [ [[TMP29]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[MIN:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MIN_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[MAX:%.*]] = phi float [ [[BC_MERGE_RDX9]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV1]]
; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV1]]
; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[GEP_SRC_2]], align 4

View File

@@ -1519,12 +1519,12 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL18]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8

View File

@@ -11,16 +11,15 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 1
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x float>, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x float> [[WIDE_LOAD]], i32 0
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x float>, ptr [[TMP9]], align 4
@@ -32,9 +31,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
; CHECK-NEXT: [[TMP14:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP15:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP4]]
; CHECK-NEXT: store <vscale x 2 x float> [[TMP14]], ptr [[TMP16]], align 4
; CHECK-NEXT: store <vscale x 2 x float> [[TMP15]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
@@ -85,7 +82,8 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP6]], 2
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1600, [[TMP7]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1600, [[N_MOD_VF]]
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
@@ -94,17 +92,13 @@ define void @test2(ptr %a, ptr noalias %b) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw i64 [[TMP11]], 1
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP12]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP3]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x float>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 2 x float>, ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP15:%.*]] = fadd <vscale x 2 x float> [[WIDE_LOAD3]], splat (float 1.000000e+00)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 1
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP3]]
; CHECK-NEXT: store <vscale x 2 x float> [[TMP14]], ptr [[TMP16]], align 4
; CHECK-NEXT: store <vscale x 2 x float> [[TMP15]], ptr [[TMP19]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
@@ -173,17 +167,13 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP9]], <vscale x 2 x float> splat (float 2.300000e+01), <vscale x 2 x float> splat (float 4.200000e+01)
; CHECK-NEXT: [[PREDPHI1:%.*]] = select <vscale x 2 x i1> [[TMP10]], <vscale x 2 x float> splat (float 2.300000e+01), <vscale x 2 x float> splat (float 4.200000e+01)
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP5]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x float>, ptr [[TMP11]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 2 x float>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = fmul <vscale x 2 x float> [[PREDPHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP16:%.*]] = fmul <vscale x 2 x float> [[PREDPHI1]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP19:%.*]] = shl nuw i64 [[TMP18]], 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP5]]
; CHECK-NEXT: store <vscale x 2 x float> [[TMP15]], ptr [[TMP17]], align 4
; CHECK-NEXT: store <vscale x 2 x float> [[TMP16]], ptr [[TMP20]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]

View File

@@ -96,7 +96,8 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 8
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP10]], 4
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP12]], 2
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP11]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
@@ -109,9 +110,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = add nuw nsw i64 [[INDEX]], 1
; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]]
; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP21]]
; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP12]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP18]], align 4
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load <vscale x 4 x i32>, ptr [[TMP22]], align 4
; CHECK-VF4UF2-NEXT: [[TMP23:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR]], <vscale x 4 x i32> [[WIDE_LOAD]], i32 -1)
@@ -119,9 +118,7 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) {
; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[TMP23]]
; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD3]], [[TMP24]]
; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = shl nuw i64 [[TMP29]], 2
; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP30]]
; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[TMP12]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP26]], ptr [[TMP25]], align 4
; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP31]], align 4
; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
@@ -251,7 +248,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP3]], 4
; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP11]], 2
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32()
@@ -265,9 +263,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
; CHECK-VF4UF2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4UF2-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP11]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP10]], align 4
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD2]] = load <vscale x 4 x i32>, ptr [[TMP14]], align 4
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vector.splice.nxv4i32(<vscale x 4 x i32> [[VECTOR_RECUR]], <vscale x 4 x i32> [[WIDE_LOAD]], i32 -1)
@@ -451,7 +447,8 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f
; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8
; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP11]], 4
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 2
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP12]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = add i64 1, [[N_VEC]]
@@ -467,11 +464,9 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4UF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-VF4UF2-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP22:%.*]] = shl nuw i64 [[TMP21]], 2
; CHECK-VF4UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i64 [[TMP22]]
; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i64 [[TMP13]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP19]], align 2, !alias.scope [[META6:![0-9]+]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD4]] = load <vscale x 4 x i16>, ptr [[TMP23]], align 2, !alias.scope [[META6]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD4]] = load <vscale x 4 x i16>, ptr [[TMP20]], align 2, !alias.scope [[META6]]
; CHECK-VF4UF2-NEXT: [[TMP24:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[VECTOR_RECUR]], <vscale x 4 x i16> [[WIDE_LOAD]], i32 -1)
; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[WIDE_LOAD]], <vscale x 4 x i16> [[WIDE_LOAD4]], i32 -1)
; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = sitofp <vscale x 4 x i16> [[WIDE_LOAD]] to <vscale x 4 x double>
@@ -483,11 +478,9 @@ define void @recurrence_3(ptr nocapture readonly %a, ptr nocapture %b, i32 %n, f
; CHECK-VF4UF2-NEXT: [[TMP32:%.*]] = fsub fast <vscale x 4 x double> [[TMP26]], [[TMP30]]
; CHECK-VF4UF2-NEXT: [[TMP33:%.*]] = fsub fast <vscale x 4 x double> [[TMP27]], [[TMP31]]
; CHECK-VF4UF2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-VF4UF2-NEXT: [[TMP36:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP37:%.*]] = shl nuw i64 [[TMP36]], 2
; CHECK-VF4UF2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP37]]
; CHECK-VF4UF2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[TMP13]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x double> [[TMP32]], ptr [[TMP34]], align 8, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x double> [[TMP33]], ptr [[TMP38]], align 8, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x double> [[TMP33]], ptr [[TMP35]], align 8, !alias.scope [[META9]], !noalias [[META6]]
; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
; CHECK-VF4UF2-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -809,7 +802,8 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) {
; CHECK-VF4UF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-VF4UF2: [[VECTOR_PH]]:
; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP7]], 2
; CHECK-VF4UF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-VF4UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
@@ -822,11 +816,9 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) {
; CHECK-VF4UF2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i16> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4UF2-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[INDEX]], 1
; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP12]]
; CHECK-VF4UF2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP15]], 2
; CHECK-VF4UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 [[TMP16]]
; CHECK-VF4UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 [[TMP7]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP13]], align 2, !alias.scope [[META17:![0-9]+]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load <vscale x 4 x i16>, ptr [[TMP17]], align 2, !alias.scope [[META17]]
; CHECK-VF4UF2-NEXT: [[WIDE_LOAD3]] = load <vscale x 4 x i16>, ptr [[TMP14]], align 2, !alias.scope [[META17]]
; CHECK-VF4UF2-NEXT: [[TMP18:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[VECTOR_RECUR]], <vscale x 4 x i16> [[WIDE_LOAD]], i32 -1)
; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[WIDE_LOAD]], <vscale x 4 x i16> [[WIDE_LOAD3]], i32 -1)
; CHECK-VF4UF2-NEXT: [[TMP20:%.*]] = sext <vscale x 4 x i16> [[TMP18]] to <vscale x 4 x i32>
@@ -836,11 +828,9 @@ define void @sink_after(ptr %a, ptr %b, i64 %n) {
; CHECK-VF4UF2-NEXT: [[TMP24:%.*]] = mul nsw <vscale x 4 x i32> [[TMP22]], [[TMP20]]
; CHECK-VF4UF2-NEXT: [[TMP25:%.*]] = mul nsw <vscale x 4 x i32> [[TMP23]], [[TMP21]]
; CHECK-VF4UF2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4UF2-NEXT: [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-VF4UF2-NEXT: [[TMP29:%.*]] = shl nuw i64 [[TMP28]], 2
; CHECK-VF4UF2-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP29]]
; CHECK-VF4UF2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[TMP7]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP25]], ptr [[TMP30]], align 4, !alias.scope [[META20]], !noalias [[META17]]
; CHECK-VF4UF2-NEXT: store <vscale x 4 x i32> [[TMP25]], ptr [[TMP27]], align 4, !alias.scope [[META20]], !noalias [[META17]]
; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-VF4UF2-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4UF2-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]

View File

@@ -29,17 +29,13 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[TMP10]], 4
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[DOTIDX]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP8]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP9]], align 8
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 2 x i64>, ptr [[TMP11]], align 8
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_IND]]
; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl i64 [[TMP15]], 4
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[DOTIDX3]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i64 [[TMP8]]
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP12]], ptr [[TMP14]], align 8
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP13]], ptr [[TMP16]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
@@ -107,15 +103,13 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP9]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP2]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP8]], align 8
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 1 x i64>, ptr [[TMP10]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD]], [[VEC_IND]]
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <vscale x 1 x i64> [[WIDE_LOAD2]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 [[TMP2]]
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP11]], ptr [[TMP13]], align 8
; CHECK-NEXT: store <vscale x 1 x i64> [[TMP12]], ptr [[TMP15]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]

View File

@@ -13,16 +13,16 @@ define i32 @iv_live_out_wide(ptr %dst) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP4]], 4
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2
; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP5]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP9]]
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP10]], align 2
; CHECK-NEXT: store <vscale x 2 x i16> zeroinitializer, ptr [[TMP14]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]]

View File

@@ -42,24 +42,21 @@ define void @loop(i64 %N, ptr noalias %a, ptr noalias %b) {
; CHECKUF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECKUF2: [[VECTOR_PH]]:
; CHECKUF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECKUF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 8
; CHECKUF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECKUF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP3]], 2
; CHECKUF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECKUF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECKUF2-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECKUF2: [[VECTOR_BODY]]:
; CHECKUF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECKUF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]]
; CHECKUF2-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
; CHECKUF2-NEXT: [[TMP16:%.*]] = shl nuw i64 [[TMP8]], 2
; CHECKUF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 [[TMP16]]
; CHECKUF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 [[TMP3]]
; CHECKUF2-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x double>, ptr [[TMP7]], align 8
; CHECKUF2-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x double>, ptr [[TMP9]], align 8
; CHECKUF2-NEXT: [[TMP10:%.*]] = fadd <vscale x 4 x double> [[WIDE_LOAD]], splat (double 1.000000e+00)
; CHECKUF2-NEXT: [[TMP11:%.*]] = fadd <vscale x 4 x double> [[WIDE_LOAD3]], splat (double 1.000000e+00)
; CHECKUF2-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
; CHECKUF2-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
; CHECKUF2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP13]], 2
; CHECKUF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP17]]
; CHECKUF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP12]], i64 [[TMP3]]
; CHECKUF2-NEXT: store <vscale x 4 x double> [[TMP10]], ptr [[TMP12]], align 8
; CHECKUF2-NEXT: store <vscale x 4 x double> [[TMP11]], ptr [[TMP14]], align 8
; CHECKUF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]

View File

@@ -12,7 +12,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP2]], 16
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP2]], 8
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP4]], 2
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 256, [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 256, [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -23,9 +24,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
; CHECK-NEXT: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], splat (i32 255)
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], splat (i32 255)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 3
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP4]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[TMP9]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP8]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x i8>, ptr [[TMP12]], align 4
; CHECK-NEXT: [[TMP26:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32>

View File

@@ -69,7 +69,7 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: Successor(s): vector.body
; CHECK-EMPTY:
; CHECK-NEXT: vector.body:
; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<1>
; CHECK-NEXT: vp<[[VPTR2:%.]]> = vector-pointer ir<%A>, ir<8>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%A>
; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]>
; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10>