diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 41eef2a36834..b7c31a8e3063 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4051,9 +4051,12 @@ public: ~VPScalarIVStepsRecipe() override = default; VPScalarIVStepsRecipe *clone() override { - return new VPScalarIVStepsRecipe(getOperand(0), getOperand(1), - getOperand(2), InductionOpcode, - getFastMathFlags(), getDebugLoc()); + auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1), + getOperand(2), InductionOpcode, + getFastMathFlags(), getDebugLoc()); + if (VPValue *StartIndex = getStartIndex()) + NewR->setStartIndex(StartIndex); + return NewR; } VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC) @@ -4080,6 +4083,14 @@ public: return getNumOperands() == 4 ? getOperand(3) : nullptr; } + /// Set or add the StartIndex operand. + void setStartIndex(VPValue *StartIndex) { + if (getNumOperands() == 4) + setOperand(3, StartIndex); + else + addOperand(StartIndex); + } + /// Returns true if the recipe only uses the first lane of operand \p Op. bool usesFirstLaneOnly(const VPValue *Op) const override { assert(is_contained(operands(), Op) && @@ -4087,6 +4098,8 @@ public: return true; } + Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; } + protected: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index e39e7113504f..f6965fcd814a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -4919,18 +4919,18 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) { vp_depth_first_shallow(Plan.getEntry())); auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry())); - // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and - // VPInstructions, excluding ones in replicate regions. Those are not - // materialized explicitly yet. Those vector users are still handled in - // VPReplicateRegion::execute(), via shouldPack(). + // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes, + // VPScalarIVStepsRecipe and VPInstructions, excluding ones in replicate + // regions. Those are not materialized explicitly yet. Those vector users are + // still handled in VPReplicateRegion::execute(), via shouldPack(). // TODO: materialize build vectors for replicating recipes in replicating // regions. for (VPBasicBlock *VPBB : concat(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R)) + if (!isa(&R)) continue; - auto *DefR = cast(&R); + auto *DefR = cast(&R); auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) { VPRegionBlock *ParentRegion = cast(U)->getRegion(); return !U->usesScalars(DefR) || ParentRegion != LoopRegion; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 222d5d6d92b8..6f7f4d9bc643 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -53,9 +53,6 @@ class UnrollState { /// Unroll replicate region \p VPR by cloning the region UF - 1 times. void unrollReplicateRegionByUF(VPRegionBlock *VPR); - /// Add a start index operand to \p Steps for \p Part. - void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part); - /// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across /// all parts. void unrollRecipeByUF(VPRecipeBase &R); @@ -126,8 +123,9 @@ public: }; } // namespace -void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, - unsigned Part) { +static void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, + unsigned Part, VPlan &Plan, + VPTypeAnalysis &TypeInfo) { if (Part == 0) return; @@ -150,7 +148,7 @@ void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex, BaseIVTy, Steps->getDebugLoc()); - Steps->addOperand(StartIndex); + Steps->setStartIndex(StartIndex); } void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { @@ -167,7 +165,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) { for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) { remapOperands(&PartIR, Part); if (auto *Steps = dyn_cast(&PartIR)) - addStartIndexForScalarSteps(Steps, Part); + addStartIndexForScalarSteps(Steps, Part, Plan, TypeInfo); addRecipeForPart(&Part0R, &PartIR, Part); } @@ -372,7 +370,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { remapOperands(Copy, Part); if (auto *ScalarIVSteps = dyn_cast(Copy)) - addStartIndexForScalarSteps(ScalarIVSteps, Part); + addStartIndexForScalarSteps(ScalarIVSteps, Part, Plan, TypeInfo); // Add operand indicating the part to generate code for, to recipes still // requiring it. @@ -531,13 +529,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { VPlanTransforms::removeDeadRecipes(Plan); } -/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or -/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar -/// definitions for operands of \DefR. +/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe, +/// VPInstruction or VPScalarIVStepsRecipe) for lane \p Lane. Use \p +/// Def2LaneDefs to look up scalar definitions for operands of \DefR. static VPValue * cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, VPSingleDefRecipe *DefR, VPLane Lane, const DenseMap> &Def2LaneDefs) { + assert((isa(DefR)) && + "DefR must be a VPReplicateRecipe, VPInstruction or " + "VPScalarIVStepsRecipe"); VPValue *Op; if (match(DefR, m_VPInstruction(m_VPValue(Op)))) { auto LaneDefs = Def2LaneDefs.find(Op); @@ -594,12 +595,46 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR, *RepR, RepR->getDebugLoc()); } else { - assert(isa(DefR) && - "DefR must be a VPReplicateRecipe or VPInstruction"); New = DefR->clone(); for (const auto &[Idx, Op] : enumerate(NewOps)) { New->setOperand(Idx, Op); } + if (auto *Steps = dyn_cast(New)) { + // Skip lane 0: an absent start index is implicitly zero. + unsigned KnownLane = Lane.getKnownLane(); + if (KnownLane != 0) { + VPTypeAnalysis TypeInfo(Plan); + Type *BaseIVTy = TypeInfo.inferScalarType(DefR->getOperand(0)); + + VPValue *StartIndex = Steps->getStartIndex(); + VPValue *LaneOffset; + unsigned AddOp; + VPIRFlags Flags; + if (BaseIVTy->isFloatingPointTy()) { + int SignedLane = static_cast(KnownLane); + if (!StartIndex && Steps->getInductionOpcode() == Instruction::FSub) + SignedLane = -SignedLane; + LaneOffset = + Plan.getOrAddLiveIn(ConstantFP::get(BaseIVTy, SignedLane)); + AddOp = Steps->getInductionOpcode(); + Flags = VPIRFlags(FastMathFlags()); + } else { + unsigned BaseIVBits = BaseIVTy->getScalarSizeInBits(); + LaneOffset = Plan.getConstantInt(APInt(BaseIVBits, KnownLane, + /*isSigned*/ false, + /*implicitTrunc*/ true)); + AddOp = Instruction::Add; + Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false)); + } + + if (StartIndex) { + VPBuilder LaneBuilder(DefR); + LaneOffset = + LaneBuilder.createNaryOp(AddOp, {StartIndex, LaneOffset}, Flags); + } + Steps->setStartIndex(LaneOffset); + } + } } New->insertBefore(DefR); return New; @@ -629,7 +664,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { SmallVector ToRemove; for (VPBasicBlock *VPBB : VPBBsToUnroll) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { - if (!isa(&R) || + if (!isa(&R) || (isa(&R) && cast(&R)->isSingleScalar()) || (isa(&R) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 1399649d1dbb..1dff1831c305 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -293,6 +293,13 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { break; } } + if (const auto *ScalarIVSteps = dyn_cast(&R)) { + unsigned NumOps = ScalarIVSteps->getNumOperands(); + if (NumOps != 3 && NumOps != 4) { + errs() << "VPScalarIVStepsRecipe must have 3 or 4 operands\n"; + return false; + } + } } auto *IRBB = dyn_cast(VPBB); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll index 60f7258ca7c2..77ea5f0e36c8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fold-tail-low-trip-count.ll @@ -16,11 +16,10 @@ define void @low_trip_count_small(i32 %x, ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0 ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3 @@ -28,7 +27,7 @@ define void @low_trip_count_small(i32 %x, ptr %dst) { ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -94,11 +93,10 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[DST]], i64 0 ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP2]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP4]], i32 3 @@ -106,7 +104,7 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[DST]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[PTR_NEXT]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index bf9c60a1761a..0a62ac980452 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -687,7 +687,6 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) { ; COST1-NEXT: br label %[[VECTOR_BODY:.*]] ; COST1: [[VECTOR_BODY]]: ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; COST1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; COST1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; COST1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; COST1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 @@ -696,7 +695,6 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) { ; COST1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 6 ; COST1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 7 ; COST1-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 96 -; COST1-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 ; COST1-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 96 ; COST1-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 192 ; COST1-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 288 @@ -704,7 +702,7 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) { ; COST1-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 480 ; COST1-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 576 ; COST1-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 672 -; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]] +; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; COST1-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] ; COST1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] ; COST1-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]] @@ -712,7 +710,7 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) { ; COST1-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; COST1-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]] ; COST1-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]] -; COST1-NEXT: [[TMP17:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP1]] +; COST1-NEXT: [[TMP17:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]] ; COST1-NEXT: [[TMP18:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP2]] ; COST1-NEXT: [[TMP19:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP3]] ; COST1-NEXT: [[TMP20:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP4]] @@ -825,20 +823,18 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) { ; COST10-NEXT: br label %[[VECTOR_BODY:.*]] ; COST10: [[VECTOR_BODY]]: ; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; COST10-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; COST10-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; COST10-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; COST10-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 ; COST10-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 96 -; COST10-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 ; COST10-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 ; COST10-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 192 ; COST10-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 288 -; COST10-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] +; COST10-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; COST10-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] ; COST10-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]] ; COST10-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]] -; COST10-NEXT: [[TMP9:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP1]] +; COST10-NEXT: [[TMP9:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]] ; COST10-NEXT: [[TMP10:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP2]] ; COST10-NEXT: [[TMP11:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP3]] ; COST10-NEXT: [[TMP12:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll index d08eb1265fc2..f63e506f3adb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll @@ -28,7 +28,6 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP107:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP148:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP149:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 3 @@ -36,7 +35,7 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OFFSET]] +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[INDEX]], [[OFFSET]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], [[OFFSET]] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP16]], [[OFFSET]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP24]], [[OFFSET]] @@ -76,25 +75,25 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP41]], i32 1 ; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP42]], i32 2 ; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP43]], i32 3 -; CHECK-NEXT: [[GEP_KERNEL:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[IV]] +; CHECK-NEXT: [[GEP_KERNEL:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[GEP_KERNEL]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_KERNEL]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP49]], align 4 ; CHECK-NEXT: [[TMP50:%.*]] = fmul fast <4 x float> [[TMP39]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP51:%.*]] = fmul fast <4 x float> [[TMP47]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP52]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP53]], align 4 ; CHECK-NEXT: [[TMP54:%.*]] = fmul fast <4 x float> [[TMP50]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP55:%.*]] = fmul fast <4 x float> [[TMP51]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP56]], align 4 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; CHECK-NEXT: [[TMP58:%.*]] = fmul fast <4 x float> [[TMP54]], [[WIDE_LOAD9]] ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast <4 x float> [[TMP55]], [[WIDE_LOAD10]] -; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[IV]] +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP60]], align 4 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index d18283f83179..53d3e9c96e9f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -20,9 +20,8 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ , [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 9be5953051e4..6f8dc73a44ca 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -437,7 +437,6 @@ define i32 @load_from_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 @@ -445,7 +444,7 @@ define i32 @load_from_pointer_induction(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP31]] ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] @@ -776,7 +775,6 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 6 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 9 @@ -792,7 +790,7 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP1]], 39 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP1]], 42 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 45 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] @@ -898,7 +896,6 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX19:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX19]], 3 -; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP92:%.*]] = add i64 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -906,7 +903,7 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[OFFSET_IDX]], 15 ; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[OFFSET_IDX]], 18 ; CHECK-NEXT: [[TMP97:%.*]] = add i64 [[OFFSET_IDX]], 21 -; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP90]] +; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP91]] ; CHECK-NEXT: [[NEXT_GEP22:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP92]] ; CHECK-NEXT: [[NEXT_GEP23:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP93]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll index 2557ae55d2c8..1a87fadbb783 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-allocsize-not-equal-typesize.ll @@ -21,11 +21,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]] @@ -45,7 +44,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3 ; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 5b4bb70e6a47..51c5c907da4f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -174,11 +174,10 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]] +; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16> @@ -186,7 +185,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index ad92b56218bb..36c539231d73 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -127,7 +127,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -143,10 +142,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -216,7 +215,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -248,13 +246,13 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -375,7 +373,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -391,10 +388,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 021bed7e89c2..aa882a65d242 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -412,7 +412,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1: vector.body: ; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -428,10 +427,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -501,7 +500,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -533,13 +531,13 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -660,7 +658,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -676,10 +673,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> -; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index 337fcf451b2f..25e18692da8c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -354,7 +354,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -370,7 +369,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i64 [[N]], [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 [[N]], [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = sub nsw i64 [[N]], [[TMP3]] @@ -403,7 +402,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i64> [[TMP45]], i64 [[TMP30]], i32 14 ; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i64> [[TMP46]], i64 [[TMP31]], i32 15 ; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i64> [[TMP47]] to <16 x i8> -; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP16]] ; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP48]], i32 15 ; CHECK-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1 @@ -419,7 +418,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0 ; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1 ; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2 ; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3 @@ -427,7 +425,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5 ; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6 ; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7 -; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]] +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[INDEX2]] ; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]] ; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]] ; CHECK-NEXT: [[TMP64:%.*]] = sub nsw i64 [[N]], [[TMP56]] @@ -444,7 +442,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; CHECK-NEXT: [[TMP75:%.*]] = insertelement <8 x i64> [[TMP74]], i64 [[TMP67]], i32 6 ; CHECK-NEXT: [[TMP76:%.*]] = insertelement <8 x i64> [[TMP75]], i64 [[TMP68]], i32 7 ; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i64> [[TMP76]] to <8 x i8> -; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[TMP53]], [[TMP61]] +; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[INDEX2]], [[TMP61]] ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]] ; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7 ; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1 @@ -478,7 +476,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: br label [[VECTOR_BODY:%.*]] ; IC2: vector.body: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -510,7 +507,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; IC2-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]] +; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]] ; IC2-NEXT: [[TMP33:%.*]] = sub nsw i64 [[N]], [[TMP1]] ; IC2-NEXT: [[TMP34:%.*]] = sub nsw i64 [[N]], [[TMP2]] ; IC2-NEXT: [[TMP35:%.*]] = sub nsw i64 [[N]], [[TMP3]] @@ -576,7 +573,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP95:%.*]] = insertelement <16 x i64> [[TMP94]], i64 [[TMP79]], i32 15 ; IC2-NEXT: [[TMP96:%.*]] = trunc <16 x i64> [[TMP63]] to <16 x i8> ; IC2-NEXT: [[TMP97:%.*]] = trunc <16 x i64> [[TMP95]] to <16 x i8> -; IC2-NEXT: [[TMP98:%.*]] = add i64 [[TMP0]], [[TMP32]] +; IC2-NEXT: [[TMP98:%.*]] = add i64 [[INDEX]], [[TMP32]] ; IC2-NEXT: [[TMP99:%.*]] = add i64 [[TMP16]], [[TMP64]] ; IC2-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP98]] ; IC2-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP99]] @@ -596,7 +593,6 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; IC2: vec.epilog.vector.body: ; IC2-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; IC2-NEXT: [[TMP105:%.*]] = add i64 [[INDEX1]], 0 ; IC2-NEXT: [[TMP106:%.*]] = add i64 [[INDEX1]], 1 ; IC2-NEXT: [[TMP107:%.*]] = add i64 [[INDEX1]], 2 ; IC2-NEXT: [[TMP108:%.*]] = add i64 [[INDEX1]], 3 @@ -604,7 +600,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP110:%.*]] = add i64 [[INDEX1]], 5 ; IC2-NEXT: [[TMP111:%.*]] = add i64 [[INDEX1]], 6 ; IC2-NEXT: [[TMP112:%.*]] = add i64 [[INDEX1]], 7 -; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[TMP105]] +; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[INDEX1]] ; IC2-NEXT: [[TMP114:%.*]] = sub nsw i64 [[N]], [[TMP106]] ; IC2-NEXT: [[TMP115:%.*]] = sub nsw i64 [[N]], [[TMP107]] ; IC2-NEXT: [[TMP116:%.*]] = sub nsw i64 [[N]], [[TMP108]] @@ -621,7 +617,7 @@ define void @test_loop2(i64 %n, ptr %dst) { ; IC2-NEXT: [[TMP127:%.*]] = insertelement <8 x i64> [[TMP126]], i64 [[TMP119]], i32 6 ; IC2-NEXT: [[TMP128:%.*]] = insertelement <8 x i64> [[TMP127]], i64 [[TMP120]], i32 7 ; IC2-NEXT: [[TMP129:%.*]] = trunc <8 x i64> [[TMP128]] to <8 x i8> -; IC2-NEXT: [[TMP130:%.*]] = add i64 [[TMP105]], [[TMP113]] +; IC2-NEXT: [[TMP130:%.*]] = add i64 [[INDEX1]], [[TMP113]] ; IC2-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP130]] ; IC2-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP129]], i32 7 ; IC2-NEXT: store i8 [[TMP132]], ptr [[TMP131]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll index a6d956ccac3d..2c82cae58903 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll @@ -234,7 +234,6 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 @@ -242,7 +241,7 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i128, ptr [[DST]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i128, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i128, ptr [[DST]], i64 [[TMP19]] ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> [[TMP26]], ptr [[TMP6]], i32 1 @@ -266,7 +265,7 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: store ptr null, ptr [[TMP10]], align 8 ; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 ; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 4 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 6 @@ -590,9 +589,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]] +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> @@ -602,10 +600,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]] ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72 @@ -710,7 +708,6 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -726,7 +723,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index b6ddf1d3e7a9..0f18b29b25d6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -554,9 +554,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]] +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> @@ -566,10 +565,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src. ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]] ; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72 @@ -674,7 +673,6 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -690,7 +688,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll index 6d0c55b1d246..267e410de1f2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll @@ -736,12 +736,11 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2: [[VECTOR_BODY]]: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] +; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] ; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP2]], align 8 ; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 +; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 ; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP8]], align 8 ; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> @@ -751,7 +750,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1 ; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8 ; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1 ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 ; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]] ; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0 @@ -773,14 +772,13 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF4-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4: [[VECTOR_BODY]]: ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]] +; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] ; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8 ; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0 +; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0 ; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0 ; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0 ; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0 @@ -796,7 +794,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) { ; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8 ; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8 ; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8 -; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1 +; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1 ; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1 ; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1 ; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll index 4761cb0d63de..e64b7adce36c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll @@ -13,18 +13,17 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -84,14 +83,13 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[C]], align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16> ; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP6]] @@ -99,7 +97,7 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index c50fdc66e7cc..b5825d9af216 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -745,11 +745,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64: [[VECTOR_BODY]]: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 ; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 @@ -796,11 +795,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV32: [[VECTOR_BODY]]: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV32-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1 +; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 ; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 @@ -847,7 +845,6 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64-UF2: [[VECTOR_BODY]]: ; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; RV64-UF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 ; RV64-UF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 @@ -855,7 +852,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { ; RV64-UF2-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -5 ; RV64-UF2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -6 ; RV64-UF2-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -7 -; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP1]], -1 ; RV64-UF2-NEXT: [[TMP10:%.*]] = add nsw i64 [[TMP2]], -1 ; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP3]], -1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index a3b91e7ff405..b1c1ca86217c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -586,9 +586,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] ; FIXEDLEN: [[VECTOR_BODY]]: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5 -; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6 ; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 ; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index ee84ef243570..c665c0754b24 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -20,13 +20,14 @@ define void @func_21() { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -45,7 +46,7 @@ define void @func_21() { ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/widen-call-with-intrinsic-or-libfunc.ll index c71628393f98..f0e97253fd6e 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -109,15 +109,14 @@ define void @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double> ; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: store double [[TMP8]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll index ac5acac85598..63b0da8101b9 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-predicate-switch.ll @@ -20,11 +20,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; CHECK-EMPTY: ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ] -; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2> -; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0> -; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]> -; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1> -; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]> +; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1> +; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]> +; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1 ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]> ; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12> diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 7bea9e07d722..3fbacb13e446 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -66,7 +66,6 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE: [[VECTOR_BODY]]: ; FORCE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCE-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 -; FORCE-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FORCE-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 ; FORCE-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10 ; FORCE-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15 @@ -74,7 +73,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25 ; FORCE-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30 ; FORCE-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35 -; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[TMP0]] +; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[OFFSET_IDX]] ; FORCE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP2]] ; FORCE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP4]] ; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP6]] @@ -90,7 +89,7 @@ define void @PR31671(float %x, ptr %d) #0 { ; FORCE-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]] ; FORCE-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC4]] ; FORCE-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC6]] -; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP0]] +; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[OFFSET_IDX]] ; FORCE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP1]] ; FORCE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP2]] ; FORCE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 1bd308855b9e..58b21f9bc816 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -92,7 +92,6 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP121:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP122:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 32 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 32 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96 @@ -108,7 +107,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] @@ -156,7 +155,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1 ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2 ; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3 -; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] @@ -238,11 +237,10 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP125]], %[[VEC_EPILOG_PH]] ], [ [[TMP155:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX12:%.*]] = mul i64 [[INDEX10]], 32 -; CHECK-NEXT: [[TMP126:%.*]] = add i64 [[OFFSET_IDX12]], 0 ; CHECK-NEXT: [[TMP127:%.*]] = add i64 [[OFFSET_IDX12]], 32 ; CHECK-NEXT: [[TMP128:%.*]] = add i64 [[OFFSET_IDX12]], 64 ; CHECK-NEXT: [[TMP129:%.*]] = add i64 [[OFFSET_IDX12]], 96 -; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX12]] ; CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP127]] ; CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP128]] ; CHECK-NEXT: [[TMP133:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP129]] @@ -254,7 +252,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x float> [[TMP138]], float [[TMP135]], i32 1 ; CHECK-NEXT: [[TMP140:%.*]] = insertelement <4 x float> [[TMP139]], float [[TMP136]], i32 2 ; CHECK-NEXT: [[TMP141:%.*]] = insertelement <4 x float> [[TMP140]], float [[TMP137]], i32 3 -; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP126]] +; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX12]] ; CHECK-NEXT: [[TMP143:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP127]] ; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP128]] ; CHECK-NEXT: [[TMP145:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP129]] @@ -432,11 +430,10 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]] ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]] ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index f20bf95af4b5..e66771188996 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -287,12 +287,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 0 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 0, 1 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1, 1 ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 2, 1 ; CHECK-NEXT: [[TMP9:%.*]] = sub i64 3, 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]] @@ -300,11 +298,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 -; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[PTRS]], align 8 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 4 [[TMP10]], <4 x i1> , <4 x float> poison), !invariant.load [[META0]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> , <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0 -; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4 +; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 84579d97b38e..86444d3354fe 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -354,7 +354,6 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3 @@ -362,7 +361,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll index 83459e386be0..8d89556fcfd7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll @@ -27,7 +27,6 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP148:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP149:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -35,7 +34,7 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP0]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[OFFSET:%.*]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP1]], [[OFFSET]] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP2]], [[OFFSET]] ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], [[OFFSET]] @@ -75,25 +74,25 @@ define float @_Z4testmm(i64 %size, i64 %offset) { ; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP41]], i32 1 ; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP42]], i32 2 ; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP43]], i32 3 -; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP48]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP49]], align 4 ; CHECK-NEXT: [[TMP50:%.*]] = fmul fast <4 x float> [[TMP39]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP51:%.*]] = fmul fast <4 x float> [[TMP47]], [[WIDE_LOAD6]] -; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP52]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP53]], align 4 ; CHECK-NEXT: [[TMP54:%.*]] = fmul fast <4 x float> [[TMP50]], [[WIDE_LOAD7]] ; CHECK-NEXT: [[TMP55:%.*]] = fmul fast <4 x float> [[TMP51]], [[WIDE_LOAD8]] -; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP56]], align 4 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; CHECK-NEXT: [[TMP58:%.*]] = fmul fast <4 x float> [[TMP54]], [[WIDE_LOAD9]] ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast <4 x float> [[TMP55]], [[WIDE_LOAD10]] -; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP60]], align 4 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 0bac10a41640..aff665dad85a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -151,9 +151,10 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1 +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -166,7 +167,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -266,9 +267,10 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1 +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -281,7 +283,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) { ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[TMP0]], i32 1 +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[OFFSET_IDX]], i32 1 ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -368,9 +370,10 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1 +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -383,7 +386,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -469,9 +472,10 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1 +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -484,7 +488,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -570,9 +574,10 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ] ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16 -; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1 +; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]] ; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 ; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 @@ -585,7 +590,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal ; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; FVW2: pred.store.if: -; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]] +; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 ; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4 ; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]] @@ -779,9 +784,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; FVW2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX]] ; FVW2-NEXT: [[OFFSET_IDX9:%.*]] = mul i64 [[INDEX]], 64 -; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX9]], 0 ; FVW2-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX9]], 64 -; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP17]] +; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[OFFSET_IDX9]] ; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]] ; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] ; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4, !alias.scope [[META8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 04143338df98..79538f7f2c68 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -255,7 +255,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -271,7 +270,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index a8cfe9ce1cc2..76b3ae77b474 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -110,9 +110,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX1]], 5 ; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP10]], 5 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP12]] @@ -120,7 +119,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr ; CHECK-NEXT: [[TMP15:%.*]] = or disjoint i64 [[TMP12]], 16 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !alias.scope [[META3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll index 14fb2a76eb75..fbdc342f0b7f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-ptradd-with-replicated-operand.ll @@ -16,7 +16,6 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 24 @@ -32,7 +31,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 104 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 112 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 120 -; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll index 67cd25d91b4b..d20723d63647 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-hoist-load-across-store.ll @@ -18,17 +18,15 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], 4 @@ -40,7 +38,7 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]] @@ -48,7 +46,7 @@ define void @pr63602_1(ptr %arr) { ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP23]], align 4 ; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4 ; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP4]], 2 @@ -154,17 +152,15 @@ define void @pr63602_2(ptr %arr) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9 -; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], 4 @@ -176,7 +172,7 @@ define void @pr63602_2(ptr %arr) { ; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]] @@ -184,7 +180,7 @@ define void @pr63602_2(ptr %arr) { ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP23]], align 4 ; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4 ; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP4]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll index d245caaeee30..e8bf299e7857 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -100,13 +100,12 @@ define i64 @reverse_load_liveout_only(ptr %A) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 17, [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP12]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 934dd3eccb9b..8f347ebf8701 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -106,7 +106,6 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -122,7 +121,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -170,7 +169,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -241,7 +240,6 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP97:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP98:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP99:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -257,7 +255,7 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -396,23 +394,38 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP145:%.*]], [[PRED_LOAD_CONTINUE33]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP146:%.*]], [[PRED_LOAD_CONTINUE33]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP147:%.*]], [[PRED_LOAD_CONTINUE33]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i64> [[TMP64]], i64 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP74:%.*]] = insertelement <4 x i64> [[TMP69]], i64 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i64> [[TMP74]], i64 [[TMP3]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP89:%.*]] = insertelement <4 x i64> [[TMP84]], i64 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i64> [[TMP89]], i64 [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP99:%.*]] = insertelement <4 x i64> [[TMP94]], i64 [[TMP7]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11 +; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i64> poison, i64 [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i64> [[TMP104]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP114:%.*]] = insertelement <4 x i64> [[TMP109]], i64 [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP119:%.*]] = insertelement <4 x i64> [[TMP114]], i64 [[TMP11]], i32 3 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP124:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i64> [[TMP124]], i64 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP134:%.*]] = insertelement <4 x i64> [[TMP129]], i64 [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x i64> [[TMP134]], i64 [[TMP15]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -462,7 +475,7 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] @@ -653,7 +666,6 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 @@ -669,7 +681,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]] @@ -717,7 +729,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 @@ -812,7 +824,6 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3 @@ -828,7 +839,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -876,7 +887,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -995,7 +1006,6 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -1011,7 +1021,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1059,7 +1069,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -1166,7 +1176,6 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1182,7 +1191,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1230,7 +1239,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1297,7 +1306,6 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1313,7 +1321,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1361,7 +1369,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1428,7 +1436,6 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1444,7 +1451,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1492,7 +1499,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1568,7 +1575,6 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 @@ -1584,7 +1590,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]] @@ -1632,7 +1638,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 ; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 @@ -1728,7 +1734,6 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1744,7 +1749,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1792,7 +1797,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -1860,7 +1865,6 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1876,7 +1880,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -1924,7 +1928,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -2002,7 +2006,6 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -2018,7 +2021,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2066,7 +2069,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 ; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 ; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 @@ -2142,7 +2145,6 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -2158,7 +2160,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 39 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 42 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 45 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2206,7 +2208,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2331,7 +2333,6 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 12 @@ -2339,7 +2340,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 20 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 28 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2363,7 +2364,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i1> [[TMP28]], i1 [[TMP25]], i32 1 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i1> [[TMP29]], i1 [[TMP26]], i32 2 ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP27]], i32 3 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2460,7 +2461,6 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15 @@ -2476,7 +2476,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2524,7 +2524,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2650,7 +2650,6 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -2666,7 +2665,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] @@ -2714,7 +2713,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] @@ -2842,7 +2841,6 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP130:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP131:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -2858,7 +2856,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP3]], 2 @@ -2874,7 +2872,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP13]], 2 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP14]], 2 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index 1350e40c77e6..1396029b20c9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -21,9 +21,6 @@ define void @foo(ptr %ptr, ptr %ptr.2) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 34c54de2140c..3cdffa8284ba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -11,9 +11,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1 @@ -35,7 +34,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2) diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index e91a0b6b5cca..a4107d5d46fa 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -58,7 +58,6 @@ define void @test(ptr %p) { ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) -; VEC-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 @@ -66,7 +65,7 @@ define void @test(ptr %p) { ; VEC-NEXT: [[TMP32:%.*]] = add i64 [[INDEX]], 5 ; VEC-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6 ; VEC-NEXT: [[TMP34:%.*]] = add i64 [[INDEX]], 7 -; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[TMP15]], 1 +; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP20:%.*]] = shl i64 [[TMP16]], 1 ; VEC-NEXT: [[TMP21:%.*]] = shl i64 [[TMP17]], 1 ; VEC-NEXT: [[TMP22:%.*]] = shl i64 [[TMP18]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicated-udiv.ll b/llvm/test/Transforms/LoopVectorize/X86/predicated-udiv.ll index ceade1bdbc0b..e19d3a4ba0fc 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicated-udiv.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicated-udiv.ll @@ -70,7 +70,6 @@ define void @simplify_udiv_4_in_replicate_region2(i8 %arg, ptr noalias %src, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE29:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 @@ -78,13 +77,13 @@ define void @simplify_udiv_4_in_replicate_region2(i8 %arg, ptr noalias %src, ptr ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD1]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 957473775e25..91b30fa50e29 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -28,7 +28,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; I64-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4) ; I64-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; I64-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0 ; I64-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; I64-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 2 ; I64-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 3 @@ -64,7 +63,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1 ; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2 ; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3 -; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]] +; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]] ; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]] ; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]] ; I64-NEXT: [[TMP27:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]] @@ -134,7 +133,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64: [[VEC_EPILOG_VECTOR_BODY]]: ; I64-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; I64-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; I64-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 0 ; I64-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 1 ; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2 ; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3 @@ -143,7 +141,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1 ; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2 ; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3 -; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]] +; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]] ; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]] ; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]] ; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]] @@ -184,7 +182,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; I32-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4) ; I32-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 ; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 ; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3 @@ -220,7 +217,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1 ; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2 ; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3 -; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]] +; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]] ; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]] ; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]] ; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]] @@ -290,7 +287,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32: [[VEC_EPILOG_VECTOR_BODY]]: ; I32-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; I32-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; I32-NEXT: [[TMP74:%.*]] = add i32 [[INDEX4]], 0 ; I32-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 1 ; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2 ; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3 @@ -299,7 +295,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1 ; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2 ; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3 -; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]] +; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]] ; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]] ; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]] ; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]] @@ -352,11 +348,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I64-NEXT: br label %[[VECTOR_BODY:.*]] ; I64: [[VECTOR_BODY]]: ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]] +; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] ; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] ; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] @@ -364,7 +359,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 ; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 ; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 -; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1 +; I64-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1 ; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1 ; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1 ; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1 @@ -399,11 +394,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]] +; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]] ; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]] ; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]] ; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]] @@ -411,7 +405,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n) ; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 ; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 ; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 -; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1 +; I32-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1 ; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1 ; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1 ; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1 @@ -708,7 +702,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 @@ -716,7 +709,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5 ; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6 ; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7 -; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1 +; I32-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1 ; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1 ; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1 @@ -790,7 +783,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias % ; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4 ; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4 ; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4 -; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] ; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]] ; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]] @@ -856,7 +849,6 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I64-NEXT: br label %[[VECTOR_BODY:.*]] ; I64: [[VECTOR_BODY]]: ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -864,7 +856,7 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I64-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], [[OFFSET]] ; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]] ; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]] ; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]] @@ -973,11 +965,10 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s ; I32-NEXT: br label %[[VECTOR_BODY:.*]] ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]] +; I32-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], [[OFFSET]] ; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]] ; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]] ; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]] @@ -1096,8 +1087,6 @@ define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, pt ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]] -; I64-NEXT: [[TMP15:%.*]] = mul i32 0, [[STEP]] -; I64-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], [[TMP15]] ; I64-NEXT: [[TMP17:%.*]] = mul i32 1, [[STEP]] ; I64-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], [[TMP17]] ; I64-NEXT: [[TMP19:%.*]] = mul i32 2, [[STEP]] @@ -1112,7 +1101,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, pt ; I64-NEXT: [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], [[TMP27]] ; I64-NEXT: [[TMP29:%.*]] = mul i32 7, [[STEP]] ; I64-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], [[TMP29]] -; I64-NEXT: [[TMP31:%.*]] = zext i32 [[TMP16]] to i64 +; I64-NEXT: [[TMP31:%.*]] = zext i32 [[OFFSET_IDX]] to i64 ; I64-NEXT: [[TMP32:%.*]] = zext i32 [[TMP18]] to i64 ; I64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP20]] to i64 ; I64-NEXT: [[TMP34:%.*]] = zext i32 [[TMP22]] to i64 @@ -1220,8 +1209,6 @@ define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, p ; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]] -; I64-NEXT: [[TMP7:%.*]] = mul i32 0, [[STEP]] -; I64-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], [[TMP7]] ; I64-NEXT: [[TMP9:%.*]] = mul i32 1, [[STEP]] ; I64-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], [[TMP9]] ; I64-NEXT: [[TMP11:%.*]] = mul i32 2, [[STEP]] @@ -1236,7 +1223,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, p ; I64-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], [[TMP19]] ; I64-NEXT: [[TMP21:%.*]] = mul i32 7, [[STEP]] ; I64-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], [[TMP21]] -; I64-NEXT: [[TMP23:%.*]] = zext i32 [[TMP8]] to i64 +; I64-NEXT: [[TMP23:%.*]] = zext i32 [[OFFSET_IDX]] to i64 ; I64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP10]] to i64 ; I64-NEXT: [[TMP25:%.*]] = zext i32 [[TMP12]] to i64 ; I64-NEXT: [[TMP26:%.*]] = zext i32 [[TMP14]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 7f7d0be09960..7811693444dd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -26,7 +26,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -58,7 +57,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 @@ -66,7 +65,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] @@ -193,13 +192,12 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 -; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] +; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]] ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] +; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] @@ -257,7 +255,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ] -; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -289,7 +286,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29 ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]] ; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 ; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 ; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 @@ -297,7 +294,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] +; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]] @@ -424,13 +421,12 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW: [[VEC_EPILOG_VECTOR_BODY]]: ; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0 ; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1 ; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2 ; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3 -; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]] +; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]] ; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]] +; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]] @@ -507,7 +503,6 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -515,7 +510,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> @@ -529,7 +524,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP19]], i32 5 ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i8> [[TMP19]], i32 6 ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP19]], i32 7 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]] @@ -562,7 +557,6 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW: [[VECTOR_BODY]]: ; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -578,7 +572,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 ; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 ; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 -; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]] ; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4 ; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> ; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> @@ -600,7 +594,7 @@ define void @test(ptr %A, ptr noalias %B) #0 { ; MAX-BW-NEXT: [[TMP65:%.*]] = extractelement <16 x i8> [[TMP35]], i32 13 ; MAX-BW-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[TMP35]], i32 14 ; MAX-BW-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP35]], i32 15 -; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]] +; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]] ; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]] ; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]] ; MAX-BW-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index df3a24ba004a..90469ebc1cc7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -198,9 +198,6 @@ define void @uniform_store_varying_value(ptr align(4) %addr) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 12 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP0]], 13 -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], 14 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], 15 ; CHECK-NEXT: store i32 [[TMP7]], ptr [[ADDR:%.*]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll index 38617d25bfd7..d4335961e19b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll @@ -16,11 +16,10 @@ define void @test(ptr %A) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var-width-4.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var-width-4.ll index cdfe9c30d10a..0b50c856d2c5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var-width-4.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var-width-4.ll @@ -11,15 +11,13 @@ define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP1:%.*]] = select i1 false, i64 1, i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 true, i64 1, i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i64 1, i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i64 1, i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[FOO]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP8]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll index cc7c16f6f5ee..fdb5a50eca83 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-single-bit-ind-var.ll @@ -12,11 +12,9 @@ define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP0:%.*]] = select i1 false, i64 1, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, i64 1, i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[FOO]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll index d29719de79ff..05abe2cc2fa3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll @@ -30,12 +30,11 @@ define void @example() { ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; FORCED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; FORCED-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[VEC_IND]] to <2 x x86_fp80> ; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 0 ; FORCED-NEXT: [[TMP6:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 1 -; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]] +; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]] ; FORCED-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP1]] ; FORCED-NEXT: store x86_fp80 [[TMP5]], ptr [[TMP3]], align 16 ; FORCED-NEXT: store x86_fp80 [[TMP6]], ptr [[TMP4]], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 456cb9f832f2..5ef3885cc8c6 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -241,15 +241,14 @@ define i32 @interleaved_access_forward(ptr %p, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1 @@ -388,15 +387,14 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP18]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[OFFSET_IDX]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 1 @@ -541,11 +539,10 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0 @@ -731,11 +728,10 @@ define void @irregular_type(ptr %a, i64 %n) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]] @@ -775,11 +771,10 @@ define void @irregular_type(ptr %a, i64 %n) { ; INTER-NEXT: br label %[[VECTOR_BODY:.*]] ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; INTER-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; INTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1 ; INTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2 ; INTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3 -; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]] +; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]] ; INTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]] ; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]] ; INTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]] @@ -944,11 +939,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] @@ -1059,11 +1053,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) { ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; INTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; INTER-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] ; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] ; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] @@ -1178,11 +1171,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] @@ -1227,11 +1219,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) { ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16 -; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0 ; INTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16 ; INTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32 ; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48 -; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] +; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]] ; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] ; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] @@ -1466,11 +1457,10 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] @@ -1495,7 +1485,7 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP24]], i32 2 ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP25]], i32 3 ; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i8> [[TMP20]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX1]] ; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP30]], align 1, !alias.scope [[META30:![0-9]+]], !noalias [[META27]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index 3c73eafa1b22..1b5cb2dc88b0 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -185,11 +185,10 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: store i64 [[TMP49]], ptr [[TMP48]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] ; CHECK: [[PRED_STORE_CONTINUE28]]: -; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0 ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2 ; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3 -; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i32 0 ; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1 ; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2 ; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3 @@ -203,7 +202,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3 ; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]] ; CHECK: [[PRED_LOAD_IF29]]: -; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1 +; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[B]], align 1 ; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE30]] ; CHECK: [[PRED_LOAD_CONTINUE30]]: @@ -270,7 +269,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]] ; CHECK: [[PRED_STORE_IF45]]: ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0 -; CHECK-NEXT: store i8 [[TMP102]], ptr [[TMP50]], align 1 +; CHECK-NEXT: store i8 [[TMP102]], ptr [[B]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]] ; CHECK: [[PRED_STORE_CONTINUE46]]: ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll index 03e0853d2907..4b364133dfef 100644 --- a/llvm/test/Transforms/LoopVectorize/debugloc.ll +++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll @@ -173,11 +173,10 @@ define void @test_scalar_steps(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0, !dbg [[LOC8:!.+]] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8:!.+]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 083c8aec4752..35ef01379c75 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -454,11 +454,10 @@ define i64 @select_argmin_iv_not_canonical(i64 %num, ptr %src) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 1a99c47aa351..0607ec38e7e4 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -325,11 +325,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 48 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] ; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] ; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]] @@ -378,11 +377,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP43:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = mul i64 [[INDEX11]], 16 -; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX13]], 0 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX13]], 16 ; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX13]], 32 ; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX13]], 48 -; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]] +; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX13]] ; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]] ; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP28]] ; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP29]] diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll index dd04ba50dd26..7df89f725197 100644 --- a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll @@ -169,7 +169,6 @@ define i64 @findlast_iv_step2_interleave(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP39:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -177,7 +176,7 @@ define i64 @findlast_iv_step2_interleave(ptr %a, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 10 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 14 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll index 0bbc96509876..bccea68476c5 100644 --- a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll +++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll @@ -312,11 +312,10 @@ define i64 @findlast_non_canonical_iv_with_expr(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 10, [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index 15622f9eb961..2c690f072211 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -15,7 +15,6 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3 @@ -23,7 +22,7 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll index 1899b1ece088..10e4eaf63cfd 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll @@ -20,13 +20,14 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -45,7 +46,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -96,16 +97,19 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP69:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> [[TMP69]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -143,7 +147,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] @@ -313,13 +317,14 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -338,7 +343,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -385,16 +390,19 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP65:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> [[TMP65]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -432,7 +440,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] @@ -595,13 +603,14 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 +; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1 ; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC1: [[PRED_LOAD_IF]]: -; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 ; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -620,7 +629,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC1: [[PRED_STORE_IF]]: -; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] +; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0 ; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 ; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] @@ -673,16 +682,19 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) -; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; VF2IC2-NEXT: [[TMP74:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 +; VF2IC2-NEXT: [[TMP75:%.*]] = insertelement <2 x i64> [[TMP74]], i64 [[TMP7]], i32 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; VF2IC2: [[PRED_LOAD_IF]]: -; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0 ; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -720,7 +732,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VF2IC2: [[PRED_STORE_IF]]: -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]] +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0 ; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0 ; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 05a6eabed443..3b1967055178 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1041,8 +1041,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 @@ -1132,8 +1130,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 @@ -1423,7 +1419,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 @@ -1431,7 +1426,7 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400 -; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -1595,11 +1590,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) { ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200 -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600 -; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] ; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]] ; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]] ; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]] @@ -1898,7 +1892,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -1906,8 +1899,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 @@ -1942,7 +1935,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4 @@ -2046,12 +2039,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1 ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 @@ -2068,7 +2060,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]] -; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; SINK-AFTER-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP22]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -2861,20 +2853,27 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3 +; UNROLL-NO-IC-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP3]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP4]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP5]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7 +; UNROLL-NO-IC-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP7]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP81]], i32 [[TMP8]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP83:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP9]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.udiv.if: -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]] +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0 ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-IC: pred.udiv.continue: @@ -2944,7 +2943,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC: pred.store.if: ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]] -; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], ptr [[TMP50]], align 4 +; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4 ; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NO-IC: pred.store.continue: ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1 @@ -3096,15 +3095,18 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE12]] ] ; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE12]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 ; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2 ; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3 +; SINK-AFTER-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0 +; SINK-AFTER-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP3]], i32 1 +; SINK-AFTER-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP4]], i32 2 +; SINK-AFTER-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP5]], i32 3 ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 ; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-AFTER: pred.udiv.if: -; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]] +; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[OFFSET_IDX]] ; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0 ; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]] ; SINK-AFTER: pred.udiv.continue: @@ -3140,7 +3142,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER: pred.store.if: ; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0 ; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]] -; SINK-AFTER-NEXT: store i32 [[TMP2]], ptr [[TMP27]], align 4 +; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4 ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]] ; SINK-AFTER: pred.store.continue: ; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index f56699a45320..0b0f51b2dc68 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1725,6 +1725,712 @@ exit: ret i32 0 } +define void @fp_iv_used_in_gep_fadd(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) { +; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fadd( +; VEC4_INTERL1-NEXT: entry: +; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC4_INTERL1: vector.ph: +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT2]], +; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[TMP5]] +; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC4_INTERL1: vector.body: +; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]] +; VEC4_INTERL1-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]] +; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP9]] +; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], 3.000000e+00 +; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP11]] +; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32> +; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1 +; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2 +; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3 +; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64 +; VEC4_INTERL1-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP18]] +; VEC4_INTERL1-NEXT: [[TMP20:%.*]] = sext i32 [[TMP15]] to i64 +; VEC4_INTERL1-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] +; VEC4_INTERL1-NEXT: [[TMP22:%.*]] = sext i32 [[TMP16]] to i64 +; VEC4_INTERL1-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]] +; VEC4_INTERL1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP17]] to i64 +; VEC4_INTERL1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP24]] +; VEC4_INTERL1-NEXT: store float [[OFFSET_IDX]], ptr [[TMP19]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP8]], ptr [[TMP21]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP10]], ptr [[TMP23]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP12]], ptr [[TMP25]], align 4 +; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC4_INTERL1-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC4_INTERL1-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VEC4_INTERL1: middle.block: +; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC4_INTERL1: scalar.ph: +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL7:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL1: for.body: +; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL7]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL1-NEXT: [[TMP27:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]] +; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]] +; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VEC4_INTERL1: exit: +; VEC4_INTERL1-NEXT: ret void +; +; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fadd( +; VEC4_INTERL2-NEXT: entry: +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC4_INTERL2: vector.ph: +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC:%.*]], i64 0 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] +; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] +; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], splat (float 4.000000e+00) +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], +; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT2]], [[TMP6]] +; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC4_INTERL2: vector.body: +; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], [[TMP5]] +; VEC4_INTERL2-NEXT: [[DOTCAST3:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST3]] +; VEC4_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]] +; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP9]] +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], 3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP11]] +; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP13]] +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fmul fast float [[FPINC]], 5.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP15]] +; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fmul fast float [[FPINC]], 6.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP17]] +; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast float [[FPINC]], 7.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP19]] +; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32> +; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i64 1 +; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2 +; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i64 3 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fptoui <4 x float> [[STEP_ADD]] to <4 x i32> +; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i64 1 +; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i64 2 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i64 3 +; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = sext i32 [[TMP22]] to i64 +; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP31]] +; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = sext i32 [[TMP23]] to i64 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]] +; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = sext i32 [[TMP24]] to i64 +; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]] +; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = sext i32 [[TMP25]] to i64 +; VEC4_INTERL2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP37]] +; VEC4_INTERL2-NEXT: [[TMP39:%.*]] = sext i32 [[TMP27]] to i64 +; VEC4_INTERL2-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP39]] +; VEC4_INTERL2-NEXT: [[TMP41:%.*]] = sext i32 [[TMP28]] to i64 +; VEC4_INTERL2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]] +; VEC4_INTERL2-NEXT: [[TMP43:%.*]] = sext i32 [[TMP29]] to i64 +; VEC4_INTERL2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]] +; VEC4_INTERL2-NEXT: [[TMP45:%.*]] = sext i32 [[TMP30]] to i64 +; VEC4_INTERL2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP45]] +; VEC4_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP32]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP8]], ptr [[TMP34]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP10]], ptr [[TMP36]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP12]], ptr [[TMP38]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP14]], ptr [[TMP40]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP16]], ptr [[TMP42]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP18]], ptr [[TMP44]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP20]], ptr [[TMP46]], align 4 +; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[STEP_ADD]], [[TMP5]] +; VEC4_INTERL2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC4_INTERL2-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VEC4_INTERL2: middle.block: +; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC4_INTERL2: scalar.ph: +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL2: for.body: +; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL2-NEXT: [[TMP48:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]] +; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]] +; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VEC4_INTERL2: exit: +; VEC4_INTERL2-NEXT: ret void +; +; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fadd( +; VEC1_INTERL2-NEXT: entry: +; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 +; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC1_INTERL2: vector.ph: +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] +; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC1_INTERL2: vector.body: +; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] +; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP5]] +; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fptoui float [[OFFSET_IDX]] to i32 +; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fptoui float [[TMP6]] to i32 +; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sext i32 [[TMP7]] to i64 +; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]] +; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = sext i32 [[TMP8]] to i64 +; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP10]], align 4 +; VEC1_INTERL2-NEXT: store float [[TMP6]], ptr [[TMP12]], align 4 +; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC1_INTERL2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VEC1_INTERL2: middle.block: +; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC1_INTERL2: scalar.ph: +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC1_INTERL2: for.body: +; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = sext i32 [[C]] to i64 +; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VEC1_INTERL2: exit: +; VEC1_INTERL2-NEXT: ret void +; +; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fadd( +; VEC2_INTERL1_PRED_STORE-NEXT: entry: +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT]], [[TMP5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fptoui <2 x float> [[VEC_IND]] to <2 x i32> +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP9]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP9]], i64 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = sext i32 [[TMP10]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP12]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = sext i32 [[TMP11]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[OFFSET_IDX]], ptr [[TMP13]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP8]], ptr [[TMP15]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC2_INTERL1_PRED_STORE: for.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = sext i32 [[C]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: exit: +; VEC2_INTERL1_PRED_STORE-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %x.05 = phi float [ %init, %entry ], [ %add, %for.body ] + %c = fptoui float %x.05 to i32 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %c + store float %x.05, ptr %arrayidx, align 4 + %add = fadd fast float %x.05, %fpinc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + +define void @fp_iv_used_in_gep_fsub(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) { +; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fsub( +; VEC4_INTERL1-NEXT: entry: +; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 +; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC4_INTERL1: vector.ph: +; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT2]], +; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[TMP5]] +; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 +; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC4_INTERL1: vector.body: +; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]] +; VEC4_INTERL1-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]] +; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], -2.000000e+00 +; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP9]] +; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], -3.000000e+00 +; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP11]] +; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32> +; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0 +; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1 +; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2 +; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3 +; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64 +; VEC4_INTERL1-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP18]] +; VEC4_INTERL1-NEXT: [[TMP20:%.*]] = sext i32 [[TMP15]] to i64 +; VEC4_INTERL1-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] +; VEC4_INTERL1-NEXT: [[TMP22:%.*]] = sext i32 [[TMP16]] to i64 +; VEC4_INTERL1-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]] +; VEC4_INTERL1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP17]] to i64 +; VEC4_INTERL1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP24]] +; VEC4_INTERL1-NEXT: store float [[OFFSET_IDX]], ptr [[TMP19]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP8]], ptr [[TMP21]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP10]], ptr [[TMP23]], align 4 +; VEC4_INTERL1-NEXT: store float [[TMP12]], ptr [[TMP25]], align 4 +; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC4_INTERL1-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC4_INTERL1-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VEC4_INTERL1: middle.block: +; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC4_INTERL1: scalar.ph: +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL7:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL1: for.body: +; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL7]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL1-NEXT: [[TMP27:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]] +; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL1-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] +; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; VEC4_INTERL1: exit: +; VEC4_INTERL1-NEXT: ret void +; +; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fsub( +; VEC4_INTERL2-NEXT: entry: +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 +; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC4_INTERL2: vector.ph: +; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC:%.*]], i64 0 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] +; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] +; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], splat (float 4.000000e+00) +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 +; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer +; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], +; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT2]], [[TMP6]] +; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC4_INTERL2: vector.body: +; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[TMP5]] +; VEC4_INTERL2-NEXT: [[DOTCAST3:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST3]] +; VEC4_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]] +; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], -2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP9]] +; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], -3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP11]] +; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP13]] +; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fmul fast float [[FPINC]], 3.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP15]] +; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP17]] +; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] +; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32> +; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i64 1 +; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2 +; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i64 3 +; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fptoui <4 x float> [[STEP_ADD]] to <4 x i32> +; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i64 0 +; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i64 1 +; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i64 2 +; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i64 3 +; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = sext i32 [[TMP22]] to i64 +; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP31]] +; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = sext i32 [[TMP23]] to i64 +; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]] +; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = sext i32 [[TMP24]] to i64 +; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]] +; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = sext i32 [[TMP25]] to i64 +; VEC4_INTERL2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP37]] +; VEC4_INTERL2-NEXT: [[TMP39:%.*]] = sext i32 [[TMP27]] to i64 +; VEC4_INTERL2-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP39]] +; VEC4_INTERL2-NEXT: [[TMP41:%.*]] = sext i32 [[TMP28]] to i64 +; VEC4_INTERL2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]] +; VEC4_INTERL2-NEXT: [[TMP43:%.*]] = sext i32 [[TMP29]] to i64 +; VEC4_INTERL2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]] +; VEC4_INTERL2-NEXT: [[TMP45:%.*]] = sext i32 [[TMP30]] to i64 +; VEC4_INTERL2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP45]] +; VEC4_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP32]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP8]], ptr [[TMP34]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP10]], ptr [[TMP36]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP12]], ptr [[TMP38]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP14]], ptr [[TMP40]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP16]], ptr [[TMP42]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP18]], ptr [[TMP44]], align 4 +; VEC4_INTERL2-NEXT: store float [[TMP20]], ptr [[TMP46]], align 4 +; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[TMP5]] +; VEC4_INTERL2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC4_INTERL2-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VEC4_INTERL2: middle.block: +; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC4_INTERL2: scalar.ph: +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL2: for.body: +; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL2-NEXT: [[TMP48:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]] +; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] +; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; VEC4_INTERL2: exit: +; VEC4_INTERL2-NEXT: ret void +; +; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fsub( +; VEC1_INTERL2-NEXT: entry: +; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 +; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VEC1_INTERL2: vector.ph: +; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] +; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC1_INTERL2: vector.body: +; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] +; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]] +; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fptoui float [[OFFSET_IDX]] to i32 +; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fptoui float [[TMP6]] to i32 +; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sext i32 [[TMP7]] to i64 +; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]] +; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = sext i32 [[TMP8]] to i64 +; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] +; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP10]], align 4 +; VEC1_INTERL2-NEXT: store float [[TMP6]], ptr [[TMP12]], align 4 +; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC1_INTERL2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VEC1_INTERL2: middle.block: +; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; VEC1_INTERL2: scalar.ph: +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC1_INTERL2: for.body: +; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = sext i32 [[C]] to i64 +; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC1_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] +; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; VEC1_INTERL2: exit: +; VEC1_INTERL2-NEXT: ret void +; +; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fsub( +; VEC2_INTERL1_PRED_STORE-NEXT: entry: +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.ph: +; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[BROADCAST_SPLAT]], [[TMP5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer +; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: vector.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fptoui <2 x float> [[VEC_IND]] to <2 x i32> +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP9]], i64 0 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP9]], i64 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = sext i32 [[TMP10]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP12]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = sext i32 [[TMP11]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[OFFSET_IDX]], ptr [[TMP13]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP8]], ptr [[TMP15]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: middle.block: +; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC2_INTERL1_PRED_STORE: for.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = sext i32 [[C]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; VEC2_INTERL1_PRED_STORE: exit: +; VEC2_INTERL1_PRED_STORE-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %x.05 = phi float [ %init, %entry ], [ %add, %for.body ] + %c = fptoui float %x.05 to i32 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %c + store float %x.05, ptr %arrayidx, align 4 + %add = fsub fast float %x.05, %fpinc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} + +define void @fp_iv_used_in_gep_fmul(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) { +; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fmul( +; VEC4_INTERL1-NEXT: entry: +; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL1: for.body: +; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] +; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL1-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]] +; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]] +; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC4_INTERL1: exit: +; VEC4_INTERL1-NEXT: ret void +; +; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fmul( +; VEC4_INTERL2-NEXT: entry: +; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC4_INTERL2: for.body: +; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64 +; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] +; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC4_INTERL2-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]] +; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]] +; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC4_INTERL2: exit: +; VEC4_INTERL2-NEXT: ret void +; +; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fmul( +; VEC1_INTERL2-NEXT: entry: +; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] +; VEC1_INTERL2: for.body: +; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64 +; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] +; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC1_INTERL2-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]] +; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]] +; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC1_INTERL2: exit: +; VEC1_INTERL2-NEXT: ret void +; +; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fmul( +; VEC2_INTERL1_PRED_STORE-NEXT: entry: +; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_BODY:%.*]] +; VEC2_INTERL1_PRED_STORE: for.body: +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]] +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]] +; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]] +; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]] +; VEC2_INTERL1_PRED_STORE: exit: +; VEC2_INTERL1_PRED_STORE-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %x.05 = phi float [ %init, %entry ], [ %add, %for.body ] + %c = fptoui float %x.05 to i32 + %arrayidx = getelementptr inbounds float, ptr %A, i32 %c + store float %x.05, ptr %arrayidx, align 4 + %add = fmul fast float %x.05, %fpinc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %N + br i1 %exitcond, label %exit, label %for.body + +exit: + ret void +} !llvm.module.flags = !{!3} !0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index e2deaaf9ae19..7f05452b9474 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -22,12 +22,11 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]] @@ -35,7 +34,7 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) ; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0 @@ -109,16 +108,17 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: store i32 99, ptr [[TMP10]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 @@ -139,7 +139,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] ; CHECK: [[PRED_LOAD_IF12]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] @@ -155,7 +155,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[PRED_LOAD_CONTINUE15]]: ; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ] ; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0 @@ -239,15 +239,16 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]] ; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -258,7 +259,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_CONTINUE20]]: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]] @@ -266,7 +267,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) ; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]] ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0 @@ -342,15 +343,16 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META38:![0-9]+]], !noalias [[META40:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: @@ -361,14 +363,14 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4, !alias.scope [[META38]], !noalias [[META40]] ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] ; CHECK: [[PRED_LOAD_CONTINUE11]]: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] ; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP25]], i32 1 ; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP32]], splat (i32 10) -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP16]], <2 x i32> [[TMP32]] ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0 @@ -455,15 +457,16 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]] ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -474,14 +477,14 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_CONTINUE20]]: -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 @@ -557,15 +560,16 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -580,7 +584,7 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] ; CHECK: [[PRED_LOAD_CONTINUE7]]: ; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10) ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 @@ -651,19 +655,20 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 20, ptr [[TMP6]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]] ; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -680,7 +685,7 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] ; CHECK: [[PRED_STORE_IF12]]: -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 10, ptr [[TMP14]], align 4, !alias.scope [[META71]], !noalias [[META73]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] ; CHECK: [[PRED_STORE_CONTINUE13]]: @@ -747,9 +752,8 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] ; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16 -; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]] ; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 152 @@ -763,7 +767,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00) -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1 @@ -846,9 +850,10 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i64 152 @@ -866,7 +871,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0 ; CHECK-NEXT: store double [[TMP19]], ptr [[TMP20]], align 8, !alias.scope [[META88:![0-9]+]], !noalias [[META85]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -882,7 +887,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] ; CHECK: [[PRED_STORE_IF4]]: -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store double [[TMP13]], ptr [[TMP43]], align 8, !alias.scope [[META88]], !noalias [[META85]] ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8 ; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP44]], align 8, !alias.scope [[META88]], !noalias [[META85]] @@ -1031,15 +1036,16 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: @@ -1054,7 +1060,7 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] ; CHECK: [[PRED_STORE_IF4]]: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] ; CHECK: [[PRED_STORE_CONTINUE5]]: @@ -1069,7 +1075,7 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] ; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META95]], !noalias [[META92]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] ; CHECK: [[PRED_STORE_CONTINUE9]]: @@ -1154,13 +1160,14 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src, ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META99:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1 @@ -1200,7 +1207,7 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src, ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]] ; CHECK: [[PRED_LOAD_IF14]]: -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META102]], !noalias [[META104]] ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 ; CHECK-NEXT: store i32 99, ptr [[TMP23]], align 4, !alias.scope [[META102]], !noalias [[META104]] @@ -1221,7 +1228,7 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src, ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] ; CHECK: [[PRED_STORE_IF18]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP32]], i32 0 ; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4, !alias.scope [[META106:![0-9]+]], !noalias [[META99]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]] @@ -1302,16 +1309,17 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META109:![0-9]+]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META112:![0-9]+]], !noalias [[META114:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; CHECK-NEXT: store i32 100, ptr [[TMP6]], align 4, !alias.scope [[META112]], !noalias [[META114]] @@ -1332,7 +1340,7 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: store i32 99, ptr [[TMP17]], align 4, !alias.scope [[META112]], !noalias [[META114]] ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META112]], !noalias [[META114]] ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 @@ -1350,7 +1358,7 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co ; CHECK: [[PRED_LOAD_CONTINUE13]]: ; CHECK-NEXT: [[TMP25:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP24]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10) -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] ; CHECK-NEXT: [[TMP29:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP26]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP29]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index 9de271e47d80..5ecd7e5feb7a 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -22,11 +22,10 @@ define void @test(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]] @@ -35,7 +34,7 @@ define void @test(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -100,16 +99,17 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -128,7 +128,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] ; CHECK: [[PRED_LOAD_IF10]]: -; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP49]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]] @@ -144,7 +144,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK: [[PRED_LOAD_CONTINUE13]]: ; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP27]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP33]], <2 x i32> [[TMP23]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -213,14 +213,13 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE17:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META14:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META17:![0-9]+]] ; CHECK-NEXT: [[TMP37:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) @@ -262,7 +261,7 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2) ; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ [[TMP36]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP31]], %[[PRED_LOAD_IF16]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP19]], <2 x i32> [[TMP28]], <2 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI18:%.*]] = select <2 x i1> [[TMP37]], <2 x i32> [[TMP32]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI18]], ptr [[TMP41]], align 4, !alias.scope [[META21:![0-9]+]], !noalias [[META23:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -332,13 +331,12 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META26:![0-9]+]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true) @@ -377,7 +375,7 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP30:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32> ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP30]], <2 x i32> [[TMP21]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META33:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -441,11 +439,10 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]] @@ -454,7 +451,7 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]] -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -581,12 +578,11 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]] @@ -594,7 +590,7 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -660,12 +656,11 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]] ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]] @@ -673,7 +668,7 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) { ; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 @@ -784,16 +779,17 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP43]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP64]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -815,7 +811,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] @@ -835,7 +831,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0 ; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]] ; CHECK: [[PRED_LOAD_IF7]]: -; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP61]], align 4 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]] @@ -852,7 +848,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s ; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP44]], %[[PRED_LOAD_IF9]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP32]], <2 x i32> [[TMP22]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP42:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP45]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 32 ; CHECK-NEXT: store <2 x i32> [[TMP42]], ptr [[TMP40]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -924,7 +920,6 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8 @@ -984,7 +979,6 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8 @@ -1042,13 +1036,12 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP12:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 20) @@ -1108,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali ; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP38]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP41]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]] ; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]] -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 8c0719b3b470..9a4f5ecd93a3 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -833,11 +833,10 @@ define void @sinkable_predicated_store(ptr %A, ptr %B) { ; VEC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]] -; VEC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]] +; VEC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8:![0-9]+]] ; VEC-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer ; VEC-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1) diff --git a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll index ccf05d73945f..d0c3eaf0f716 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-multiple-uses-in-same-instruction.ll @@ -11,14 +11,12 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP0]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[INDEX]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP1]], i64 [[TMP1]] -; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 diff --git a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll index 8f773e2f0edd..a2e60c46ebf5 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll @@ -22,9 +22,8 @@ define void @int_iv_based_on_pointer_iv(ptr %A) { ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4 -; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]] +; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]] ; VF2-NEXT: store i8 0, ptr [[TMP9]], align 1 ; VF2-NEXT: store i8 0, ptr [[TMP10]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index f384a66a32e0..62f2f7f65240 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -863,15 +863,14 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4 @@ -1065,11 +1064,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] @@ -1081,7 +1079,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]] @@ -1237,9 +1235,8 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8 @@ -1391,11 +1388,10 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1 @@ -1564,7 +1560,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 @@ -1573,7 +1568,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1 ; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]] @@ -1774,7 +1769,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3 @@ -1792,7 +1786,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]] ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1 @@ -2428,13 +2422,12 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]] ; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1 ; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2 @@ -2590,7 +2583,6 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 @@ -2602,7 +2594,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16> ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP5]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP6]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll index 9bbd67059e84..74d912352f51 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-metadata.ll @@ -20,17 +20,16 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP3]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], splat (double 2.000000e+00) -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP11]], align 8, !tbaa [[TBAA5]] diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 3c2dbacc656a..03ffb6fbc584 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1131,9 +1131,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 -; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] +; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2 ; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll index b14a1cdff92c..0f66692aacf0 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-neg-off.ll @@ -21,9 +21,10 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]] -; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1 @@ -31,7 +32,7 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 4db3d1eed477..b46cd8ecea5b 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -1299,9 +1299,8 @@ define i16 @multiple_exit_none_via_latch(ptr %dst, i64 %x) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8 ; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll index 9339aed92796..0fa47f71851e 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-with-constant-exit-condition.ll @@ -17,9 +17,8 @@ define i16 @multiple_exit_one_with_constant_condition(ptr %dst, i64 %x) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8 ; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 6e7852ec347d..62aa879e08f0 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -14,9 +14,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr ; VF4IC1: [[VECTOR_BODY]]: ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 -; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1 -; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2 ; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3 ; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]] ; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1 @@ -41,7 +38,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2 ; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3 ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]] ; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1 @@ -86,7 +82,6 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF4IC1: [[VECTOR_BODY]]: ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 @@ -99,7 +94,7 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]] ; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]] ; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]] -; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4 +; VF4IC1-NEXT: store i32 [[INDEX]], ptr [[TMP6]], align 4 ; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 ; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 ; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4 @@ -120,11 +115,10 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts( ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VF2IC2-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3 -; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP7]], 1 +; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1 ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]] ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll index ce0736486de2..18bd16873c1d 100644 --- a/llvm/test/Transforms/LoopVectorize/operand-bundles.ll +++ b/llvm/test/Transforms/LoopVectorize/operand-bundles.ll @@ -142,18 +142,17 @@ define void @assume_loop_variant_operand_bundle(ptr noalias %a, ptr noalias %b) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 -; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ] +; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[INDEX]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ] ; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 9114dd86e6d4..143da205f18b 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -793,7 +793,6 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 6 @@ -802,7 +801,7 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] @@ -829,7 +828,6 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6 @@ -838,7 +836,7 @@ define void @multiple_ivs_wide(ptr %dst) { ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2 ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll index 94392f856c38..515ef2b9b863 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll @@ -15,22 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d5088fe60ee9..736d8356d221 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -25,11 +25,10 @@ define void @a(ptr readnone %b) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] @@ -535,15 +534,13 @@ define i64 @ivopt_widen_ptr_indvar_2(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]] -; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 0, [[TMP1]] -; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] ; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]] ; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] ; STRIDED-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP1]] ; STRIDED-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], [[TMP12]] ; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP1]] ; STRIDED-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], [[TMP14]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] +; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]] ; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP15]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 678b9d047d9f..17370fcdd576 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -18,9 +18,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; IC1: [[VECTOR_BODY]]: ; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE13:.*]] ] -; IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 -; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] +; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] ; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 ; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1 @@ -113,11 +112,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] -; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 -; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] +; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]] ; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 ; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index d3a8927d52aa..9266ffd5e4bd 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -2264,11 +2264,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 @@ -2284,7 +2287,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2342,15 +2345,22 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP94:%.*]], %[[PRED_LOAD_CONTINUE15]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP98:%.*]], %[[PRED_LOAD_CONTINUE15]] ] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP1]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP2]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP3]], i32 3 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0 +; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP5]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP6]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP7]], i32 3 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 @@ -2379,7 +2389,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]: -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll index fa55c0a17073..df50e6f12d36 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll @@ -329,11 +329,10 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] @@ -535,11 +534,10 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll index 132ab6ca67b2..c9bc762fd857 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-div.ll @@ -13,8 +13,9 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c) ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], @@ -22,7 +23,7 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -42,7 +43,7 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -93,16 +94,17 @@ define void @test_sdiv_variant_divisor_load(ptr noalias %a, ptr noalias %b, ptr ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i64> [[TMP30]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -127,7 +129,7 @@ define void @test_sdiv_variant_divisor_load(ptr noalias %a, ptr noalias %b, ptr ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP22]], i32 0 ; CHECK-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -182,13 +184,14 @@ define void @test_sdiv_invariant_divisor_nonconst(ptr noalias %a, i64 %b, ptr no ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -208,7 +211,7 @@ define void @test_sdiv_invariant_divisor_nonconst(ptr noalias %a, i64 %b, ptr no ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -259,13 +262,14 @@ define void @test_sdiv_invariant_divisor_minusone(ptr noalias %a, ptr noalias %c ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -285,7 +289,7 @@ define void @test_sdiv_invariant_divisor_minusone(ptr noalias %a, ptr noalias %c ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -336,13 +340,14 @@ define void @test_sdiv_invariant_divisor_safeimm(ptr noalias %a, ptr noalias %c) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -361,7 +366,7 @@ define void @test_sdiv_invariant_divisor_safeimm(ptr noalias %a, ptr noalias %c) ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] @@ -475,13 +480,14 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2, ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -502,7 +508,7 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2, ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 126846456bb1..f030d6c8dbbf 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -97,11 +97,10 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP7]] @@ -155,17 +154,16 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3 -; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[A]], i32 0 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3 ; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: @@ -196,7 +194,7 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0 -; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index c64a333b990b..5f47c9ad1836 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -149,7 +149,6 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -163,7 +162,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -204,15 +203,14 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 @@ -253,7 +251,6 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -267,7 +264,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -309,7 +306,6 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -323,7 +319,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -364,7 +360,6 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -378,7 +373,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -418,15 +413,14 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 @@ -606,7 +600,6 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -620,7 +613,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -661,15 +654,14 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 @@ -710,7 +702,6 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -724,7 +715,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -766,7 +757,6 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -780,7 +770,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -822,7 +812,6 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -836,7 +825,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -877,15 +866,14 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 @@ -924,7 +912,6 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16 -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16> @@ -934,7 +921,7 @@ define void @test_step_is_not_invariant(ptr %A) { ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP8]] -; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2 +; CHECK-NEXT: store i16 [[TMP0]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2) diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index 63be2fd7bd8a..949275137b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -147,9 +147,8 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8 @@ -158,7 +157,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP7]], splat (i64 42) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 8 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP12]], align 8 @@ -198,13 +197,12 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 @@ -244,9 +242,8 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8 @@ -255,7 +252,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP7]], splat (i64 42) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 8 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP12]], align 8 @@ -295,7 +292,6 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -309,7 +305,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -400,7 +396,6 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -414,7 +409,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -456,7 +451,6 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -470,7 +464,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -512,7 +506,6 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -526,7 +519,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index 1512cd4986e3..3e5c47fa0003 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -283,7 +283,6 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 @@ -291,7 +290,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP3]] @@ -348,7 +347,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 5 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 6 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <8 x i32> [[TMP63]], i32 [[TMP56]], i32 7 -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <8 x i32> [[TMP64]], ptr [[TMP65]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index a5bb07f1fd4e..3176e1528ad7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -229,7 +229,6 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -243,7 +242,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -266,7 +265,6 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -292,7 +290,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 -; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -337,15 +335,14 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2: vector.body: ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1 +; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 -; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8 ; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 @@ -366,11 +363,10 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4: vector.body: ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 1 +; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -378,7 +374,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1 ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3 -; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -423,7 +419,6 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -437,7 +432,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -459,7 +454,6 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -485,7 +479,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 -; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -530,7 +524,6 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 @@ -544,7 +537,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42) ; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0 ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1 -; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 @@ -566,7 +559,6 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -592,7 +584,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1 ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3 -; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -725,15 +717,14 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 -; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1 +; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] ; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42) ; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0 ; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 -; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; VF2-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8 @@ -754,11 +745,10 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 -; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 1 +; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[OFFSET_IDX]], 1 ; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]] ; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42) @@ -766,7 +756,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1 ; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2 ; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 -; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -811,7 +801,6 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -825,7 +814,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -848,7 +837,6 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -874,7 +862,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 -; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -920,7 +908,6 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 @@ -934,7 +921,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42) ; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 -; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 @@ -957,7 +944,6 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -983,7 +969,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1 ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3 -; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 6ce34ad58c75..7737b6104e23 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -304,7 +304,6 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer @@ -320,7 +319,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -345,7 +344,6 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -373,7 +371,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -425,7 +423,6 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) @@ -441,7 +438,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -466,7 +463,6 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -494,7 +490,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -546,7 +542,6 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) @@ -562,7 +557,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -587,7 +582,6 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -615,7 +609,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -667,7 +661,6 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer @@ -683,7 +676,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -707,7 +700,6 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -735,7 +727,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -786,7 +778,6 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) @@ -802,7 +793,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -826,7 +817,6 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -854,7 +844,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -905,7 +895,6 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) @@ -921,7 +910,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42) ; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 -; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 @@ -945,7 +934,6 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -973,7 +961,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1 ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3 -; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]] +; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] @@ -1322,7 +1310,6 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer @@ -1338,7 +1325,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1363,7 +1350,6 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -1391,7 +1377,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -1443,7 +1429,6 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) @@ -1459,7 +1444,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1484,7 +1469,6 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -1512,7 +1496,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -1564,7 +1548,6 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) @@ -1580,7 +1563,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1605,7 +1588,6 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6 @@ -1633,7 +1615,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -1685,7 +1667,6 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer @@ -1701,7 +1682,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1726,7 +1707,6 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -1754,7 +1734,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -1806,7 +1786,6 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1) ; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1) @@ -1822,7 +1801,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1847,7 +1826,6 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -1875,7 +1853,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] @@ -1927,7 +1905,6 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3) ; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3) @@ -1943,7 +1920,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42) ; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0 ; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1 -; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 ; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8 @@ -1968,7 +1945,6 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] ; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3 ; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]] -; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3 ; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 ; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9 @@ -1996,7 +1972,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1 ; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2 ; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3 -; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] +; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]] ; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] ; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll index 41cc313b418d..e1ca138eb028 100644 --- a/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll +++ b/llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll @@ -107,11 +107,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) { ; FORCED-TF-NEXT: br label [[VECTOR_BODY:%.*]] ; FORCED-TF: vector.body: ; FORCED-TF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ] -; FORCED-TF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; FORCED-TF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; FORCED-TF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; FORCED-TF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]] +; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[INDEX]] ; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]] ; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]] ; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]] @@ -179,11 +178,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]] ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll index 9f3744f4a515..76a8bba86984 100644 --- a/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -112,9 +112,10 @@ for.end: ; VF8: vector.body: ; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] ; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step -; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step -; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]] -; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD]] +; VF8-NEXT: [[MUL1:%.+]] = mul i64 1, %step +; VF8-NEXT: [[ADD1:%.+]] = add i64 [[OFFSET_IDX]], [[MUL1]] +; VF8: getelementptr inbounds i32, ptr %in, i64 [[OFFSET_IDX]] +; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD1]] ; VF8: middle.block: ; VF1-LABEL: @doit2 diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll index 8b9a52689904..c573ebaa51e9 100644 --- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll @@ -141,9 +141,8 @@ define void @stride_poison(ptr %dst) mustprogress { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]] ; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1 ; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 6a6ae316f4e5..6cb76f86aeb5 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -157,11 +157,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]] @@ -235,11 +234,10 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1, ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX2]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX2]], 3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[OFFSET_IDX2]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP7]]