[VPlan] Replicate VPScalarIVStepsRecipe by VF outside replicate regions. (#170053)
Extend replicateByVF to also handle VPScalarIVStepsRecipe. To do so, the patch adds a new lane operand to VPScalarIVStepsRecipe, which is only added when replicating. This enables removing a number of lane 0 computations. The lane operand will also be used to explicitly replicate replicate regions in a follow-up. Depends on https://github.com/llvm/llvm-project/pull/169796 Depends on https://github.com/llvm/llvm-project/pull/170906 PR: https://github.com/llvm/llvm-project/pull/170053
This commit is contained in:
@@ -4051,9 +4051,12 @@ public:
|
||||
~VPScalarIVStepsRecipe() override = default;
|
||||
|
||||
VPScalarIVStepsRecipe *clone() override {
|
||||
return new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
|
||||
getOperand(2), InductionOpcode,
|
||||
getFastMathFlags(), getDebugLoc());
|
||||
auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
|
||||
getOperand(2), InductionOpcode,
|
||||
getFastMathFlags(), getDebugLoc());
|
||||
if (VPValue *StartIndex = getStartIndex())
|
||||
NewR->setStartIndex(StartIndex);
|
||||
return NewR;
|
||||
}
|
||||
|
||||
VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
|
||||
@@ -4080,6 +4083,14 @@ public:
|
||||
return getNumOperands() == 4 ? getOperand(3) : nullptr;
|
||||
}
|
||||
|
||||
/// Set or add the StartIndex operand.
|
||||
void setStartIndex(VPValue *StartIndex) {
|
||||
if (getNumOperands() == 4)
|
||||
setOperand(3, StartIndex);
|
||||
else
|
||||
addOperand(StartIndex);
|
||||
}
|
||||
|
||||
/// Returns true if the recipe only uses the first lane of operand \p Op.
|
||||
bool usesFirstLaneOnly(const VPValue *Op) const override {
|
||||
assert(is_contained(operands(), Op) &&
|
||||
@@ -4087,6 +4098,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
|
||||
|
||||
protected:
|
||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||||
/// Print the recipe.
|
||||
|
||||
@@ -4919,18 +4919,18 @@ void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
|
||||
vp_depth_first_shallow(Plan.getEntry()));
|
||||
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
|
||||
vp_depth_first_shallow(LoopRegion->getEntry()));
|
||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and
|
||||
// VPInstructions, excluding ones in replicate regions. Those are not
|
||||
// materialized explicitly yet. Those vector users are still handled in
|
||||
// VPReplicateRegion::execute(), via shouldPack().
|
||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
|
||||
// VPScalarIVStepsRecipe and VPInstructions, excluding ones in replicate
|
||||
// regions. Those are not materialized explicitly yet. Those vector users are
|
||||
// still handled in VPReplicateRegion::execute(), via shouldPack().
|
||||
// TODO: materialize build vectors for replicating recipes in replicating
|
||||
// regions.
|
||||
for (VPBasicBlock *VPBB :
|
||||
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
if (!isa<VPReplicateRecipe, VPInstruction>(&R))
|
||||
if (!isa<VPScalarIVStepsRecipe, VPReplicateRecipe, VPInstruction>(&R))
|
||||
continue;
|
||||
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
|
||||
auto *DefR = cast<VPSingleDefRecipe>(&R);
|
||||
auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {
|
||||
VPRegionBlock *ParentRegion = cast<VPRecipeBase>(U)->getRegion();
|
||||
return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
|
||||
|
||||
@@ -53,9 +53,6 @@ class UnrollState {
|
||||
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
|
||||
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
|
||||
|
||||
/// Add a start index operand to \p Steps for \p Part.
|
||||
void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
|
||||
|
||||
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
|
||||
/// all parts.
|
||||
void unrollRecipeByUF(VPRecipeBase &R);
|
||||
@@ -126,8 +123,9 @@ public:
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
|
||||
unsigned Part) {
|
||||
static void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
|
||||
unsigned Part, VPlan &Plan,
|
||||
VPTypeAnalysis &TypeInfo) {
|
||||
if (Part == 0)
|
||||
return;
|
||||
|
||||
@@ -150,7 +148,7 @@ void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
|
||||
StartIndex = Builder.createScalarCast(Instruction::SIToFP, StartIndex,
|
||||
BaseIVTy, Steps->getDebugLoc());
|
||||
|
||||
Steps->addOperand(StartIndex);
|
||||
Steps->setStartIndex(StartIndex);
|
||||
}
|
||||
|
||||
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
|
||||
@@ -167,7 +165,7 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
|
||||
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
|
||||
remapOperands(&PartIR, Part);
|
||||
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
|
||||
addStartIndexForScalarSteps(Steps, Part);
|
||||
addStartIndexForScalarSteps(Steps, Part, Plan, TypeInfo);
|
||||
|
||||
addRecipeForPart(&Part0R, &PartIR, Part);
|
||||
}
|
||||
@@ -372,7 +370,7 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
|
||||
remapOperands(Copy, Part);
|
||||
|
||||
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
|
||||
addStartIndexForScalarSteps(ScalarIVSteps, Part);
|
||||
addStartIndexForScalarSteps(ScalarIVSteps, Part, Plan, TypeInfo);
|
||||
|
||||
// Add operand indicating the part to generate code for, to recipes still
|
||||
// requiring it.
|
||||
@@ -531,13 +529,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
|
||||
VPlanTransforms::removeDeadRecipes(Plan);
|
||||
}
|
||||
|
||||
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
|
||||
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
|
||||
/// definitions for operands of \DefR.
|
||||
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe,
|
||||
/// VPInstruction or VPScalarIVStepsRecipe) for lane \p Lane. Use \p
|
||||
/// Def2LaneDefs to look up scalar definitions for operands of \DefR.
|
||||
static VPValue *
|
||||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
||||
VPSingleDefRecipe *DefR, VPLane Lane,
|
||||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
|
||||
assert((isa<VPInstruction, VPReplicateRecipe, VPScalarIVStepsRecipe>(DefR)) &&
|
||||
"DefR must be a VPReplicateRecipe, VPInstruction or "
|
||||
"VPScalarIVStepsRecipe");
|
||||
VPValue *Op;
|
||||
if (match(DefR, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)))) {
|
||||
auto LaneDefs = Def2LaneDefs.find(Op);
|
||||
@@ -594,12 +595,46 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
|
||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr,
|
||||
*RepR, *RepR, RepR->getDebugLoc());
|
||||
} else {
|
||||
assert(isa<VPInstruction>(DefR) &&
|
||||
"DefR must be a VPReplicateRecipe or VPInstruction");
|
||||
New = DefR->clone();
|
||||
for (const auto &[Idx, Op] : enumerate(NewOps)) {
|
||||
New->setOperand(Idx, Op);
|
||||
}
|
||||
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(New)) {
|
||||
// Skip lane 0: an absent start index is implicitly zero.
|
||||
unsigned KnownLane = Lane.getKnownLane();
|
||||
if (KnownLane != 0) {
|
||||
VPTypeAnalysis TypeInfo(Plan);
|
||||
Type *BaseIVTy = TypeInfo.inferScalarType(DefR->getOperand(0));
|
||||
|
||||
VPValue *StartIndex = Steps->getStartIndex();
|
||||
VPValue *LaneOffset;
|
||||
unsigned AddOp;
|
||||
VPIRFlags Flags;
|
||||
if (BaseIVTy->isFloatingPointTy()) {
|
||||
int SignedLane = static_cast<int>(KnownLane);
|
||||
if (!StartIndex && Steps->getInductionOpcode() == Instruction::FSub)
|
||||
SignedLane = -SignedLane;
|
||||
LaneOffset =
|
||||
Plan.getOrAddLiveIn(ConstantFP::get(BaseIVTy, SignedLane));
|
||||
AddOp = Steps->getInductionOpcode();
|
||||
Flags = VPIRFlags(FastMathFlags());
|
||||
} else {
|
||||
unsigned BaseIVBits = BaseIVTy->getScalarSizeInBits();
|
||||
LaneOffset = Plan.getConstantInt(APInt(BaseIVBits, KnownLane,
|
||||
/*isSigned*/ false,
|
||||
/*implicitTrunc*/ true));
|
||||
AddOp = Instruction::Add;
|
||||
Flags = VPIRFlags(VPIRFlags::WrapFlagsTy(false, false));
|
||||
}
|
||||
|
||||
if (StartIndex) {
|
||||
VPBuilder LaneBuilder(DefR);
|
||||
LaneOffset =
|
||||
LaneBuilder.createNaryOp(AddOp, {StartIndex, LaneOffset}, Flags);
|
||||
}
|
||||
Steps->setStartIndex(LaneOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
New->insertBefore(DefR);
|
||||
return New;
|
||||
@@ -629,7 +664,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
|
||||
SmallVector<VPRecipeBase *> ToRemove;
|
||||
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
|
||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
|
||||
if (!isa<VPInstruction, VPReplicateRecipe>(&R) ||
|
||||
if (!isa<VPInstruction, VPReplicateRecipe, VPScalarIVStepsRecipe>(&R) ||
|
||||
(isa<VPReplicateRecipe>(&R) &&
|
||||
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
|
||||
(isa<VPInstruction>(&R) &&
|
||||
|
||||
@@ -293,6 +293,13 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (const auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
|
||||
unsigned NumOps = ScalarIVSteps->getNumOperands();
|
||||
if (NumOps != 3 && NumOps != 4) {
|
||||
errs() << "VPScalarIVStepsRecipe must have 3 or 4 operands\n";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto *IRBB = dyn_cast<VPIRBasicBlock>(VPBB);
|
||||
|
||||
@@ -16,11 +16,10 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 0
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> [[TMP1]], ptr [[NEXT_GEP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP3]], i32 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP4]], i32 3
|
||||
@@ -28,7 +27,7 @@ define void @low_trip_count_small(i32 %x, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 1
|
||||
; CHECK-NEXT: store i8 0, ptr [[TMP7]], align 1
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
@@ -94,11 +93,10 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) {
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr [[DST]], i64 0
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 1
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 2
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[PTR]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x ptr> poison, ptr [[DST]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> [[TMP2]], ptr [[NEXT_GEP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> [[TMP3]], ptr [[NEXT_GEP3]], i32 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x ptr> [[TMP4]], ptr [[NEXT_GEP4]], i32 3
|
||||
@@ -106,7 +104,7 @@ define ptr @low_trip_count_small_with_live_out(i32 %x, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[PTR_NEXT:%.*]] = getelementptr i8, ptr [[DST]], i64 1
|
||||
; CHECK-NEXT: store i8 0, ptr [[PTR_NEXT]], align 1
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
|
||||
@@ -687,7 +687,6 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) {
|
||||
; COST1-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; COST1: [[VECTOR_BODY]]:
|
||||
; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; COST1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; COST1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; COST1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; COST1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -696,7 +695,6 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) {
|
||||
; COST1-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 6
|
||||
; COST1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 7
|
||||
; COST1-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 96
|
||||
; COST1-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; COST1-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 96
|
||||
; COST1-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 192
|
||||
; COST1-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 288
|
||||
@@ -704,7 +702,7 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) {
|
||||
; COST1-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 480
|
||||
; COST1-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 576
|
||||
; COST1-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 672
|
||||
; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP9]]
|
||||
; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; COST1-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
|
||||
; COST1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
|
||||
; COST1-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
|
||||
@@ -712,7 +710,7 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) {
|
||||
; COST1-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
|
||||
; COST1-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
|
||||
; COST1-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
|
||||
; COST1-NEXT: [[TMP17:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP1]]
|
||||
; COST1-NEXT: [[TMP17:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]]
|
||||
; COST1-NEXT: [[TMP18:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP2]]
|
||||
; COST1-NEXT: [[TMP19:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP3]]
|
||||
; COST1-NEXT: [[TMP20:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP4]]
|
||||
@@ -825,20 +823,18 @@ define void @force_branch_cost(ptr readonly %src, ptr %dst) {
|
||||
; COST10-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; COST10: [[VECTOR_BODY]]:
|
||||
; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; COST10-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; COST10-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; COST10-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; COST10-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
; COST10-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 96
|
||||
; COST10-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; COST10-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96
|
||||
; COST10-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 192
|
||||
; COST10-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 288
|
||||
; COST10-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
|
||||
; COST10-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; COST10-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
|
||||
; COST10-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
|
||||
; COST10-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
|
||||
; COST10-NEXT: [[TMP9:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP1]]
|
||||
; COST10-NEXT: [[TMP9:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[INDEX]]
|
||||
; COST10-NEXT: [[TMP10:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP2]]
|
||||
; COST10-NEXT: [[TMP11:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP3]]
|
||||
; COST10-NEXT: [[TMP12:%.*]] = getelementptr [4 x i8], ptr [[SRC]], i64 [[TMP4]]
|
||||
|
||||
@@ -28,7 +28,6 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP107:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP148:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP149:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -36,7 +35,7 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP155:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[IV]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[INDEX]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP16]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP24]], [[OFFSET]]
|
||||
@@ -76,25 +75,25 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP41]], i32 1
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP42]], i32 2
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP43]], i32 3
|
||||
; CHECK-NEXT: [[GEP_KERNEL:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_KERNEL:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[GEP_KERNEL]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_KERNEL]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = fmul fast <4 x float> [[TMP39]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = fmul fast <4 x float> [[TMP47]], [[WIDE_LOAD6]]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP52]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP53]], align 4
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = fmul fast <4 x float> [[TMP50]], [[WIDE_LOAD7]]
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = fmul fast <4 x float> [[TMP51]], [[WIDE_LOAD8]]
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP56]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP57]], align 4
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = fmul fast <4 x float> [[TMP54]], [[WIDE_LOAD9]]
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = fmul fast <4 x float> [[TMP55]], [[WIDE_LOAD10]]
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP60]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP61]], align 4
|
||||
|
||||
@@ -20,9 +20,8 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
|
||||
@@ -437,7 +437,6 @@ define i32 @load_from_pointer_induction(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -445,7 +444,7 @@ define i32 @load_from_pointer_induction(ptr %start, ptr %end) {
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP31]]
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
|
||||
@@ -776,7 +775,6 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP1]], 6
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 9
|
||||
@@ -792,7 +790,7 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP1]], 39
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP1]], 42
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 45
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
|
||||
@@ -898,7 +896,6 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n
|
||||
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX19:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT28:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX19]], 3
|
||||
; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP92:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -906,7 +903,7 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n
|
||||
; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[OFFSET_IDX]], 15
|
||||
; CHECK-NEXT: [[TMP96:%.*]] = add i64 [[OFFSET_IDX]], 18
|
||||
; CHECK-NEXT: [[TMP97:%.*]] = add i64 [[OFFSET_IDX]], 21
|
||||
; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP90]]
|
||||
; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP91]]
|
||||
; CHECK-NEXT: [[NEXT_GEP22:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP92]]
|
||||
; CHECK-NEXT: [[NEXT_GEP23:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP93]]
|
||||
|
||||
@@ -21,11 +21,10 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP3]]
|
||||
@@ -45,7 +44,7 @@ define void @pr58722_load_interleave_group(ptr %src, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i24> [[TMP19]], i24 [[TMP16]], i32 3
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i24> [[TMP20]] to <4 x i32>
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
|
||||
|
||||
@@ -174,11 +174,10 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
|
||||
@@ -186,7 +185,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
|
||||
|
||||
@@ -127,7 +127,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-INTERLEAVE1: vector.body:
|
||||
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -143,10 +142,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -216,7 +215,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -248,13 +246,13 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
|
||||
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -375,7 +373,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-MAXBW: vector.body:
|
||||
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -391,10 +388,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) {
|
||||
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
|
||||
; CHECK-MAXBW-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
|
||||
; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
|
||||
@@ -412,7 +412,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-INTERLEAVE1: vector.body:
|
||||
; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP69:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -428,10 +427,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -501,7 +500,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP137:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -533,13 +531,13 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
|
||||
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -660,7 +658,6 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-MAXBW: vector.body:
|
||||
; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-MAXBW-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP138:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -676,10 +673,10 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 {
|
||||
; CHECK-MAXBW-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-MAXBW-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-MAXBW-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-MAXBW-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1
|
||||
; CHECK-MAXBW-NEXT: [[TMP36:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32>
|
||||
; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP38:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP39:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-MAXBW-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
|
||||
@@ -354,7 +354,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -370,7 +369,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i64 [[N]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = sub nsw i64 [[N]], [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub nsw i64 [[N]], [[TMP3]]
|
||||
@@ -403,7 +402,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x i64> [[TMP45]], i64 [[TMP30]], i32 14
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <16 x i64> [[TMP46]], i64 [[TMP31]], i32 15
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = trunc <16 x i64> [[TMP47]] to <16 x i8>
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[TMP0]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = add i64 [[INDEX]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP49]]
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i8> [[TMP48]], i32 15
|
||||
; CHECK-NEXT: store i8 [[TMP51]], ptr [[TMP50]], align 1
|
||||
@@ -419,7 +418,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
|
||||
; CHECK: vec.epilog.vector.body:
|
||||
; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = add i64 [[INDEX2]], 0
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[INDEX2]], 1
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[INDEX2]], 2
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[INDEX2]], 3
|
||||
@@ -427,7 +425,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = add i64 [[INDEX2]], 5
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[INDEX2]], 6
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = add i64 [[INDEX2]], 7
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[TMP53]]
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = sub nsw i64 [[N]], [[INDEX2]]
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = sub nsw i64 [[N]], [[TMP54]]
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = sub nsw i64 [[N]], [[TMP55]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = sub nsw i64 [[N]], [[TMP56]]
|
||||
@@ -444,7 +442,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP75:%.*]] = insertelement <8 x i64> [[TMP74]], i64 [[TMP67]], i32 6
|
||||
; CHECK-NEXT: [[TMP76:%.*]] = insertelement <8 x i64> [[TMP75]], i64 [[TMP68]], i32 7
|
||||
; CHECK-NEXT: [[TMP77:%.*]] = trunc <8 x i64> [[TMP76]] to <8 x i8>
|
||||
; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[TMP53]], [[TMP61]]
|
||||
; CHECK-NEXT: [[TMP78:%.*]] = add i64 [[INDEX2]], [[TMP61]]
|
||||
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
|
||||
; CHECK-NEXT: [[TMP80:%.*]] = extractelement <8 x i8> [[TMP77]], i32 7
|
||||
; CHECK-NEXT: store i8 [[TMP80]], ptr [[TMP79]], align 1
|
||||
@@ -478,7 +476,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; IC2: vector.body:
|
||||
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -510,7 +507,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
|
||||
; IC2-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
|
||||
; IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
|
||||
; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[TMP0]]
|
||||
; IC2-NEXT: [[TMP32:%.*]] = sub nsw i64 [[N:%.*]], [[INDEX]]
|
||||
; IC2-NEXT: [[TMP33:%.*]] = sub nsw i64 [[N]], [[TMP1]]
|
||||
; IC2-NEXT: [[TMP34:%.*]] = sub nsw i64 [[N]], [[TMP2]]
|
||||
; IC2-NEXT: [[TMP35:%.*]] = sub nsw i64 [[N]], [[TMP3]]
|
||||
@@ -576,7 +573,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: [[TMP95:%.*]] = insertelement <16 x i64> [[TMP94]], i64 [[TMP79]], i32 15
|
||||
; IC2-NEXT: [[TMP96:%.*]] = trunc <16 x i64> [[TMP63]] to <16 x i8>
|
||||
; IC2-NEXT: [[TMP97:%.*]] = trunc <16 x i64> [[TMP95]] to <16 x i8>
|
||||
; IC2-NEXT: [[TMP98:%.*]] = add i64 [[TMP0]], [[TMP32]]
|
||||
; IC2-NEXT: [[TMP98:%.*]] = add i64 [[INDEX]], [[TMP32]]
|
||||
; IC2-NEXT: [[TMP99:%.*]] = add i64 [[TMP16]], [[TMP64]]
|
||||
; IC2-NEXT: [[TMP100:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP98]]
|
||||
; IC2-NEXT: [[TMP101:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP99]]
|
||||
@@ -596,7 +593,6 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
|
||||
; IC2: vec.epilog.vector.body:
|
||||
; IC2-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; IC2-NEXT: [[TMP105:%.*]] = add i64 [[INDEX1]], 0
|
||||
; IC2-NEXT: [[TMP106:%.*]] = add i64 [[INDEX1]], 1
|
||||
; IC2-NEXT: [[TMP107:%.*]] = add i64 [[INDEX1]], 2
|
||||
; IC2-NEXT: [[TMP108:%.*]] = add i64 [[INDEX1]], 3
|
||||
@@ -604,7 +600,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: [[TMP110:%.*]] = add i64 [[INDEX1]], 5
|
||||
; IC2-NEXT: [[TMP111:%.*]] = add i64 [[INDEX1]], 6
|
||||
; IC2-NEXT: [[TMP112:%.*]] = add i64 [[INDEX1]], 7
|
||||
; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[TMP105]]
|
||||
; IC2-NEXT: [[TMP113:%.*]] = sub nsw i64 [[N]], [[INDEX1]]
|
||||
; IC2-NEXT: [[TMP114:%.*]] = sub nsw i64 [[N]], [[TMP106]]
|
||||
; IC2-NEXT: [[TMP115:%.*]] = sub nsw i64 [[N]], [[TMP107]]
|
||||
; IC2-NEXT: [[TMP116:%.*]] = sub nsw i64 [[N]], [[TMP108]]
|
||||
@@ -621,7 +617,7 @@ define void @test_loop2(i64 %n, ptr %dst) {
|
||||
; IC2-NEXT: [[TMP127:%.*]] = insertelement <8 x i64> [[TMP126]], i64 [[TMP119]], i32 6
|
||||
; IC2-NEXT: [[TMP128:%.*]] = insertelement <8 x i64> [[TMP127]], i64 [[TMP120]], i32 7
|
||||
; IC2-NEXT: [[TMP129:%.*]] = trunc <8 x i64> [[TMP128]] to <8 x i8>
|
||||
; IC2-NEXT: [[TMP130:%.*]] = add i64 [[TMP105]], [[TMP113]]
|
||||
; IC2-NEXT: [[TMP130:%.*]] = add i64 [[INDEX1]], [[TMP113]]
|
||||
; IC2-NEXT: [[TMP131:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP130]]
|
||||
; IC2-NEXT: [[TMP132:%.*]] = extractelement <8 x i8> [[TMP129]], i32 7
|
||||
; IC2-NEXT: store i8 [[TMP132]], ptr [[TMP131]], align 1
|
||||
|
||||
@@ -234,7 +234,6 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -242,7 +241,7 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i128, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i128, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i128, ptr [[DST]], i64 [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x ptr> [[TMP26]], ptr [[TMP6]], i32 1
|
||||
@@ -266,7 +265,7 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
|
||||
; CHECK-NEXT: store ptr null, ptr [[TMP10]], align 8
|
||||
; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8
|
||||
; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 2
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 4
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 6
|
||||
@@ -590,9 +589,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
|
||||
@@ -602,10 +600,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
|
||||
@@ -710,7 +708,6 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -726,7 +723,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
|
||||
|
||||
@@ -554,9 +554,8 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi double [ 3.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr [[T:%.*]], ptr [[SRC_0]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x double>, ptr [[GEP_0]], align 8
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 0, i32 3>
|
||||
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x double> [[WIDE_VEC]], <6 x double> poison, <2 x i32> <i32 1, i32 4>
|
||||
@@ -566,10 +565,10 @@ define double @test_scalarization_cost_for_load_of_address(ptr %src.0, ptr %src.
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[STRIDED_VEC2]], splat (double 3.000000e+00)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP3]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC_1]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[GEP_SRC]], align 8
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP7]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[T_2:%.*]], ptr [[SRC_2]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [[T_2]], ptr [[SRC_2]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[GEP_72:%.*]] = getelementptr i8, ptr [[GEP_SRC_2]], i64 72
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP11]], i64 72
|
||||
@@ -674,7 +673,6 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -690,7 +688,7 @@ define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]]
|
||||
|
||||
@@ -736,12 +736,11 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
|
||||
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; VF2: [[VECTOR_BODY]]:
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
|
||||
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i32>, ptr [[TMP2]], align 8
|
||||
; VF2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[WIDE_VEC]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0
|
||||
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
|
||||
; VF2-NEXT: [[WIDE_VEC1:%.*]] = load <6 x i32>, ptr [[TMP8]], align 8
|
||||
; VF2-NEXT: [[TMP13:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 0, i32 3>
|
||||
@@ -751,7 +750,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
|
||||
; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
|
||||
; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
|
||||
; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]]
|
||||
; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
|
||||
@@ -773,14 +772,13 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
|
||||
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; VF4: [[VECTOR_BODY]]:
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]]
|
||||
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP4]], align 8
|
||||
; VF4-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 0
|
||||
; VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 0
|
||||
; VF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 0
|
||||
; VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 0
|
||||
; VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 0
|
||||
@@ -796,7 +794,7 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
|
||||
; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
|
||||
; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
|
||||
; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8
|
||||
; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
|
||||
; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[INDEX]], i32 1
|
||||
; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
|
||||
; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1
|
||||
; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1
|
||||
|
||||
@@ -13,18 +13,17 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -84,14 +83,13 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[C]], align 4
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i16>
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[WIDE_LOAD]], [[TMP6]]
|
||||
@@ -99,7 +97,7 @@ define void @test1(ptr noalias %M3, ptr noalias %A, ptr noalias %B, ptr noalias
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
|
||||
@@ -745,11 +745,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
|
||||
; RV64: [[VECTOR_BODY]]:
|
||||
; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
|
||||
; RV64-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
|
||||
; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
|
||||
; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1
|
||||
; RV64-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
||||
; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
|
||||
; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
|
||||
; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
|
||||
@@ -796,11 +795,10 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
|
||||
; RV32: [[VECTOR_BODY]]:
|
||||
; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
|
||||
; RV32-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
|
||||
; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
|
||||
; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP0]], -1
|
||||
; RV32-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
||||
; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1
|
||||
; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1
|
||||
; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
|
||||
@@ -847,7 +845,6 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
|
||||
; RV64-UF2: [[VECTOR_BODY]]:
|
||||
; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
|
||||
; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; RV64-UF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; RV64-UF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2
|
||||
; RV64-UF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3
|
||||
@@ -855,7 +852,7 @@ define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) {
|
||||
; RV64-UF2-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -5
|
||||
; RV64-UF2-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], -6
|
||||
; RV64-UF2-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -7
|
||||
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP0]], -1
|
||||
; RV64-UF2-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
||||
; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP1]], -1
|
||||
; RV64-UF2-NEXT: [[TMP10:%.*]] = add nsw i64 [[TMP2]], -1
|
||||
; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP3]], -1
|
||||
|
||||
@@ -586,9 +586,6 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
|
||||
; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; FIXEDLEN: [[VECTOR_BODY]]:
|
||||
; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
|
||||
; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5
|
||||
; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6
|
||||
; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7
|
||||
; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8
|
||||
; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
|
||||
|
||||
@@ -20,13 +20,14 @@ define void @func_21() {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
|
||||
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i8> [[VEC_IND]], splat (i8 4)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
@@ -45,7 +46,7 @@ define void @func_21() {
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; CHECK: pred.store.if:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
|
||||
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
|
||||
@@ -109,15 +109,14 @@ define void @test(ptr noalias %src, ptr noalias %dst) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]])
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP6]], align 8
|
||||
; CHECK-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
|
||||
|
||||
@@ -20,11 +20,9 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: vector.body:
|
||||
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, vector.ph ], [ vp<[[CAN_IV_NEXT:%.+]]>, default.2 ]
|
||||
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>
|
||||
; CHECK-NEXT: EMIT vp<[[STEP1:%.+]]> = extractelement vp<[[STEPS]]>, ir<0>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[STEP1]]>
|
||||
; CHECK-NEXT: EMIT vp<[[STEP2:%.+]]> = extractelement vp<[[STEPS]]>, ir<1>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEP2]]>
|
||||
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, ir<2>, ir<1>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR:%.+]]> = ptradd ir<%start>, vp<[[CAN_IV]]>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR]]>.1 = ptradd ir<%start>, vp<[[STEPS]]>
|
||||
; CHECK-NEXT: EMIT vp<[[PTR_VEC:%.+]]> = buildvector vp<[[PTR]]>, vp<[[PTR]]>.1
|
||||
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[PTR]]>
|
||||
; CHECK-NEXT: EMIT vp<[[C1:%.+]]> = icmp eq ir<%l>, ir<-12>
|
||||
|
||||
@@ -66,7 +66,6 @@ define void @PR31671(float %x, ptr %d) #0 {
|
||||
; FORCE: [[VECTOR_BODY]]:
|
||||
; FORCE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; FORCE-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
|
||||
; FORCE-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FORCE-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
|
||||
; FORCE-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; FORCE-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
|
||||
@@ -74,7 +73,7 @@ define void @PR31671(float %x, ptr %d) #0 {
|
||||
; FORCE-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 25
|
||||
; FORCE-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; FORCE-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 35
|
||||
; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[TMP0]]
|
||||
; FORCE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[DATA:%.*]], ptr [[D]], i64 0, i32 3, i64 [[OFFSET_IDX]]
|
||||
; FORCE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP2]]
|
||||
; FORCE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP4]]
|
||||
; FORCE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 3, i64 [[TMP6]]
|
||||
@@ -90,7 +89,7 @@ define void @PR31671(float %x, ptr %d) #0 {
|
||||
; FORCE-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC2]]
|
||||
; FORCE-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC4]]
|
||||
; FORCE-NEXT: [[TMP15:%.*]] = fmul <2 x float> [[BROADCAST_SPLAT]], [[STRIDED_VEC6]]
|
||||
; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP0]]
|
||||
; FORCE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[OFFSET_IDX]]
|
||||
; FORCE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP1]]
|
||||
; FORCE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP2]]
|
||||
; FORCE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[DATA]], ptr [[D]], i64 0, i32 0, i64 [[TMP3]]
|
||||
|
||||
@@ -92,7 +92,6 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP121:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP122:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 32
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 64
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 96
|
||||
@@ -108,7 +107,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 416
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 448
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 480
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
|
||||
@@ -156,7 +155,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x float> [[TMP63]], float [[TMP60]], i32 1
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x float> [[TMP64]], float [[TMP61]], i32 2
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <4 x float> [[TMP65]], float [[TMP62]], i32 3
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
|
||||
@@ -238,11 +237,10 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
|
||||
; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP125]], %[[VEC_EPILOG_PH]] ], [ [[TMP155:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX12:%.*]] = mul i64 [[INDEX10]], 32
|
||||
; CHECK-NEXT: [[TMP126:%.*]] = add i64 [[OFFSET_IDX12]], 0
|
||||
; CHECK-NEXT: [[TMP127:%.*]] = add i64 [[OFFSET_IDX12]], 32
|
||||
; CHECK-NEXT: [[TMP128:%.*]] = add i64 [[OFFSET_IDX12]], 64
|
||||
; CHECK-NEXT: [[TMP129:%.*]] = add i64 [[OFFSET_IDX12]], 96
|
||||
; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP126]]
|
||||
; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[OFFSET_IDX12]]
|
||||
; CHECK-NEXT: [[TMP131:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP127]]
|
||||
; CHECK-NEXT: [[TMP132:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP128]]
|
||||
; CHECK-NEXT: [[TMP133:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP129]]
|
||||
@@ -254,7 +252,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
|
||||
; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x float> [[TMP138]], float [[TMP135]], i32 1
|
||||
; CHECK-NEXT: [[TMP140:%.*]] = insertelement <4 x float> [[TMP139]], float [[TMP136]], i32 2
|
||||
; CHECK-NEXT: [[TMP141:%.*]] = insertelement <4 x float> [[TMP140]], float [[TMP137]], i32 3
|
||||
; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP126]]
|
||||
; CHECK-NEXT: [[TMP142:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[OFFSET_IDX12]]
|
||||
; CHECK-NEXT: [[TMP143:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP127]]
|
||||
; CHECK-NEXT: [[TMP144:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP128]]
|
||||
; CHECK-NEXT: [[TMP145:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP129]]
|
||||
@@ -432,11 +430,10 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 40
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 40
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 80
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 120
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP9]]
|
||||
|
||||
@@ -287,12 +287,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 0, 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1, 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 2, 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 3, 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[INPUT]], i64 -1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP9]]
|
||||
@@ -300,11 +298,10 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x ptr> [[TMP14]], ptr [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
|
||||
; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8
|
||||
; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[PTRS]], align 8
|
||||
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr align 4 [[TMP10]], <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> poison), !invariant.load [[META0]]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 0
|
||||
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4
|
||||
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[OUTPUT]], align 4
|
||||
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
;
|
||||
|
||||
@@ -354,7 +354,6 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -362,7 +361,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
|
||||
|
||||
@@ -27,7 +27,6 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP148:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP149:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -35,7 +34,7 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP0]], [[OFFSET:%.*]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[OFFSET:%.*]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP1]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP2]], [[OFFSET]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], [[OFFSET]]
|
||||
@@ -75,25 +74,25 @@ define float @_Z4testmm(i64 %size, i64 %offset) {
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = insertelement <4 x float> [[TMP44]], float [[TMP41]], i32 1
|
||||
; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP42]], i32 2
|
||||
; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP43]], i32 3
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [512 x float], ptr @kernel, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP48]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP49]], align 4
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = fmul fast <4 x float> [[TMP39]], [[WIDE_LOAD]]
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = fmul fast <4 x float> [[TMP47]], [[WIDE_LOAD6]]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [512 x float], ptr @kernel2, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[TMP52]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP52]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP53]], align 4
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = fmul fast <4 x float> [[TMP50]], [[WIDE_LOAD7]]
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = fmul fast <4 x float> [[TMP51]], [[WIDE_LOAD8]]
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [512 x float], ptr @kernel3, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP56]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP57]], align 4
|
||||
; CHECK-NEXT: [[TMP58:%.*]] = fmul fast <4 x float> [[TMP54]], [[WIDE_LOAD9]]
|
||||
; CHECK-NEXT: [[TMP59:%.*]] = fmul fast <4 x float> [[TMP55]], [[WIDE_LOAD10]]
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [512 x float], ptr @kernel4, i64 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP60]], align 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP61]], align 4
|
||||
|
||||
@@ -151,9 +151,10 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
|
||||
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
|
||||
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
|
||||
; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
|
||||
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
@@ -166,7 +167,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
|
||||
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FVW2: pred.store.if:
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
|
||||
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
|
||||
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
@@ -266,9 +267,10 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
|
||||
; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
|
||||
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
|
||||
; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
|
||||
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
@@ -281,7 +283,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
|
||||
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FVW2: pred.store.if:
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[TMP0]], i32 1
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], i64 [[OFFSET_IDX]], i32 1
|
||||
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
|
||||
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
|
||||
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
@@ -368,9 +370,10 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
|
||||
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
|
||||
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
|
||||
; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
|
||||
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
@@ -383,7 +386,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
|
||||
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FVW2: pred.store.if:
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
|
||||
; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
|
||||
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
@@ -469,9 +472,10 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
|
||||
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
|
||||
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
|
||||
; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
|
||||
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
@@ -484,7 +488,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
|
||||
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FVW2: pred.store.if:
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
|
||||
; FVW2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4
|
||||
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
@@ -570,9 +574,10 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
|
||||
; FVW2-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
|
||||
; FVW2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 16>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE3]] ]
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX1]], 16
|
||||
; FVW2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; FVW2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = insertelement <2 x i64> [[TMP18]], i64 [[TMP1]], i32 1
|
||||
; FVW2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP1]]
|
||||
; FVW2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; FVW2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
@@ -585,7 +590,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
|
||||
; FVW2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; FVW2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
|
||||
; FVW2: pred.store.if:
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[TMP0]]
|
||||
; FVW2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
|
||||
; FVW2-NEXT: store float [[TMP13]], ptr addrspace(1) [[TMP12]], align 4
|
||||
; FVW2-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
@@ -779,9 +784,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt
|
||||
; FVW2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
|
||||
; FVW2-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX]]
|
||||
; FVW2-NEXT: [[OFFSET_IDX9:%.*]] = mul i64 [[INDEX]], 64
|
||||
; FVW2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX9]], 0
|
||||
; FVW2-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX9]], 64
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP17]]
|
||||
; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[OFFSET_IDX9]]
|
||||
; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]]
|
||||
; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]]
|
||||
; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4, !alias.scope [[META8:![0-9]+]]
|
||||
|
||||
@@ -255,7 +255,6 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2048>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -271,7 +270,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
|
||||
|
||||
@@ -110,9 +110,8 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX1]], 5
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[TMP10]], 5
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP9]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[TMP12]]
|
||||
@@ -120,7 +119,7 @@ define void @geps_feeding_interleave_groups_with_reuse(ptr %arg, i64 %arg1, ptr
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = or disjoint i64 [[TMP12]], 16
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX1]], 4
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP26]], align 4, !alias.scope [[META3:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP13]], align 4, !alias.scope [[META3]]
|
||||
|
||||
@@ -16,7 +16,6 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 8
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 24
|
||||
@@ -32,7 +31,7 @@ define ptr @test_interleave_ptradd_with_replicated_op(ptr %m) #0 {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 104
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 112
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 120
|
||||
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[M]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[M]], i64 [[TMP3]]
|
||||
|
||||
@@ -18,17 +18,15 @@ define void @pr63602_1(ptr %arr) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP3]], 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], 4
|
||||
@@ -40,7 +38,7 @@ define void @pr63602_1(ptr %arr) {
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP15]], align 4
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]]
|
||||
@@ -48,7 +46,7 @@ define void @pr63602_1(ptr %arr) {
|
||||
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP23]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP25]], align 4
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[TMP1]], 2
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP2]], 2
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP3]], 2
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP4]], 2
|
||||
@@ -154,17 +152,15 @@ define void @pr63602_2(ptr %arr) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 4, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX1]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 6
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 9
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP1]], 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw i64 [[TMP3]], 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], 4
|
||||
@@ -176,7 +172,7 @@ define void @pr63602_2(ptr %arr) {
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP15]], align 4
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[OFFSET_IDX1]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP9]]
|
||||
@@ -184,7 +180,7 @@ define void @pr63602_2(ptr %arr) {
|
||||
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP23]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP25]], align 4
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[TMP1]], 2
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add nuw nsw i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = add nuw nsw i64 [[TMP2]], 2
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP3]], 2
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add nuw nsw i64 [[TMP4]], 2
|
||||
|
||||
@@ -100,13 +100,12 @@ define i64 @reverse_load_liveout_only(ptr %A) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 17, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP12]], -1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], -1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP13]], align 4
|
||||
|
||||
@@ -106,7 +106,6 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -122,7 +121,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -170,7 +169,7 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -241,7 +240,6 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP97:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP98:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP99:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -257,7 +255,7 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -396,23 +394,38 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP145:%.*]], [[PRED_LOAD_CONTINUE33]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP146:%.*]], [[PRED_LOAD_CONTINUE33]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP147:%.*]], [[PRED_LOAD_CONTINUE33]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i64> [[TMP64]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP74:%.*]] = insertelement <4 x i64> [[TMP69]], i64 [[TMP2]], i32 2
|
||||
; CHECK-NEXT: [[TMP79:%.*]] = insertelement <4 x i64> [[TMP74]], i64 [[TMP3]], i32 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP89:%.*]] = insertelement <4 x i64> [[TMP84]], i64 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i64> [[TMP89]], i64 [[TMP6]], i32 2
|
||||
; CHECK-NEXT: [[TMP99:%.*]] = insertelement <4 x i64> [[TMP94]], i64 [[TMP7]], i32 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 9
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 10
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 11
|
||||
; CHECK-NEXT: [[TMP104:%.*]] = insertelement <4 x i64> poison, i64 [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i64> [[TMP104]], i64 [[TMP9]], i32 1
|
||||
; CHECK-NEXT: [[TMP114:%.*]] = insertelement <4 x i64> [[TMP109]], i64 [[TMP10]], i32 2
|
||||
; CHECK-NEXT: [[TMP119:%.*]] = insertelement <4 x i64> [[TMP114]], i64 [[TMP11]], i32 3
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 12
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP124:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP129:%.*]] = insertelement <4 x i64> [[TMP124]], i64 [[TMP13]], i32 1
|
||||
; CHECK-NEXT: [[TMP134:%.*]] = insertelement <4 x i64> [[TMP129]], i64 [[TMP14]], i32 2
|
||||
; CHECK-NEXT: [[TMP139:%.*]] = insertelement <4 x i64> [[TMP134]], i64 [[TMP15]], i32 3
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -462,7 +475,7 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
@@ -653,7 +666,6 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -669,7 +681,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
|
||||
@@ -717,7 +729,7 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) {
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12
|
||||
@@ -812,7 +824,6 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1024, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
@@ -828,7 +839,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -876,7 +887,7 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -995,7 +1006,6 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -1011,7 +1021,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1059,7 +1069,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
|
||||
@@ -1166,7 +1176,6 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1182,7 +1191,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1230,7 +1239,7 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -1297,7 +1306,6 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1313,7 +1321,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1361,7 +1369,7 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -1428,7 +1436,6 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1444,7 +1451,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1492,7 +1499,7 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -1568,7 +1575,6 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP76:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1584,7 +1590,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP4]]
|
||||
@@ -1632,7 +1638,7 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 1
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8
|
||||
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12
|
||||
@@ -1728,7 +1734,6 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1744,7 +1749,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1792,7 +1797,7 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -1860,7 +1865,6 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1876,7 +1880,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -1924,7 +1928,7 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -2002,7 +2006,6 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP73:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -2018,7 +2021,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 13
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 14
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 15
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -2066,7 +2069,7 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4
|
||||
; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8
|
||||
; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12
|
||||
@@ -2142,7 +2145,6 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -2158,7 +2160,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 39
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 42
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 45
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -2206,7 +2208,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
|
||||
@@ -2331,7 +2333,6 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP57:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 12
|
||||
@@ -2339,7 +2340,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 20
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -2363,7 +2364,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i1> [[TMP28]], i1 [[TMP25]], i32 1
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i1> [[TMP29]], i1 [[TMP26]], i32 2
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i1> [[TMP30]], i1 [[TMP27]], i32 3
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
|
||||
@@ -2460,7 +2461,6 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 15
|
||||
@@ -2476,7 +2476,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 65
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 70
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 75
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -2524,7 +2524,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
|
||||
@@ -2650,7 +2650,6 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP114:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -2666,7 +2665,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
@@ -2714,7 +2713,7 @@ define i32 @test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]]
|
||||
@@ -2842,7 +2841,6 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP130:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP131:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -2858,7 +2856,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP1]], 2
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP2]], 2
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP3]], 2
|
||||
@@ -2874,7 +2872,7 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP13]], 2
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP14]], 2
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP15]], 2
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i64 [[TMP3]]
|
||||
|
||||
@@ -21,9 +21,6 @@ define void @foo(ptr %ptr, ptr %ptr.2) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
|
||||
; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[INDEX]]
|
||||
|
||||
@@ -11,9 +11,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1
|
||||
@@ -35,7 +34,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
|
||||
; CHECK: [[PRED_LOAD_CONTINUE2]]:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
|
||||
@@ -58,7 +58,6 @@ define void @test(ptr %p) {
|
||||
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 1, i16 2, i16 3, i16 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
|
||||
; VEC-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
|
||||
; VEC-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
|
||||
; VEC-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2
|
||||
; VEC-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -66,7 +65,7 @@ define void @test(ptr %p) {
|
||||
; VEC-NEXT: [[TMP32:%.*]] = add i64 [[INDEX]], 5
|
||||
; VEC-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 6
|
||||
; VEC-NEXT: [[TMP34:%.*]] = add i64 [[INDEX]], 7
|
||||
; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[TMP15]], 1
|
||||
; VEC-NEXT: [[TMP19:%.*]] = shl i64 [[INDEX]], 1
|
||||
; VEC-NEXT: [[TMP20:%.*]] = shl i64 [[TMP16]], 1
|
||||
; VEC-NEXT: [[TMP21:%.*]] = shl i64 [[TMP17]], 1
|
||||
; VEC-NEXT: [[TMP22:%.*]] = shl i64 [[TMP18]], 1
|
||||
|
||||
@@ -70,7 +70,6 @@ define void @simplify_udiv_4_in_replicate_region2(i8 %arg, ptr noalias %src, ptr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE29:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
@@ -78,13 +77,13 @@ define void @simplify_udiv_4_in_replicate_region2(i8 %arg, ptr noalias %src, ptr
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 4
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
|
||||
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD1]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
|
||||
|
||||
@@ -28,7 +28,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I64-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4)
|
||||
; I64-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4)
|
||||
; I64-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4)
|
||||
; I64-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
|
||||
; I64-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; I64-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 2
|
||||
; I64-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 3
|
||||
@@ -64,7 +63,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
|
||||
; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
|
||||
; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
|
||||
; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
|
||||
; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]]
|
||||
; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
|
||||
; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
|
||||
; I64-NEXT: [[TMP27:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP7]]
|
||||
@@ -134,7 +133,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I64: [[VEC_EPILOG_VECTOR_BODY]]:
|
||||
; I64-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 0
|
||||
; I64-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 1
|
||||
; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2
|
||||
; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3
|
||||
@@ -143,7 +141,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1
|
||||
; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2
|
||||
; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3
|
||||
; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
|
||||
; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]]
|
||||
; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
|
||||
; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
|
||||
; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
|
||||
@@ -184,7 +182,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I32-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4)
|
||||
; I32-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4)
|
||||
; I32-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4)
|
||||
; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
|
||||
; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2
|
||||
; I32-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 3
|
||||
@@ -220,7 +217,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
|
||||
; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
|
||||
; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
|
||||
; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
|
||||
; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX]]
|
||||
; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
|
||||
; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
|
||||
; I32-NEXT: [[TMP18:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
|
||||
@@ -290,7 +287,6 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I32: [[VEC_EPILOG_VECTOR_BODY]]:
|
||||
; I32-NEXT: [[INDEX4:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; I32-NEXT: [[VEC_IND5:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; I32-NEXT: [[TMP74:%.*]] = add i32 [[INDEX4]], 0
|
||||
; I32-NEXT: [[TMP75:%.*]] = add i32 [[INDEX4]], 1
|
||||
; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2
|
||||
; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3
|
||||
@@ -299,7 +295,7 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
|
||||
; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1
|
||||
; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2
|
||||
; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3
|
||||
; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
|
||||
; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[INDEX4]]
|
||||
; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
|
||||
; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
|
||||
; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
|
||||
@@ -352,11 +348,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
|
||||
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; I64: [[VECTOR_BODY]]:
|
||||
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
|
||||
; I64-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
|
||||
; I64-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
|
||||
; I64-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
|
||||
; I64-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
|
||||
@@ -364,7 +359,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
|
||||
; I64-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
|
||||
; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
|
||||
; I64-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
|
||||
; I64-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
|
||||
; I64-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1
|
||||
; I64-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
|
||||
; I64-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
|
||||
; I64-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
|
||||
@@ -399,11 +394,10 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
|
||||
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; I32: [[VECTOR_BODY]]:
|
||||
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP0]]
|
||||
; I32-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDEX]]
|
||||
; I32-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
|
||||
; I32-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP2]]
|
||||
; I32-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
|
||||
@@ -411,7 +405,7 @@ define void @test_store_loaded_value(ptr noalias %src, ptr noalias %dst, i32 %n)
|
||||
; I32-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8
|
||||
; I32-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8
|
||||
; I32-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8
|
||||
; I32-NEXT: [[TMP12:%.*]] = shl i64 [[TMP0]], 1
|
||||
; I32-NEXT: [[TMP12:%.*]] = shl i64 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP13:%.*]] = shl i64 [[TMP1]], 1
|
||||
; I32-NEXT: [[TMP14:%.*]] = shl i64 [[TMP2]], 1
|
||||
; I32-NEXT: [[TMP15:%.*]] = shl i64 [[TMP3]], 1
|
||||
@@ -708,7 +702,6 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
|
||||
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; I32: [[VECTOR_BODY]]:
|
||||
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; I32-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
|
||||
; I32-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -716,7 +709,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
|
||||
; I32-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 5
|
||||
; I32-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
|
||||
; I32-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7
|
||||
; I32-NEXT: [[TMP11:%.*]] = add i64 [[TMP3]], 1
|
||||
; I32-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP12:%.*]] = add i64 [[TMP4]], 1
|
||||
; I32-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], 1
|
||||
; I32-NEXT: [[TMP14:%.*]] = add i64 [[TMP6]], 1
|
||||
@@ -790,7 +783,7 @@ define void @loaded_address_used_by_load_through_blend(i64 %start, ptr noalias %
|
||||
; I32-NEXT: [[TMP88:%.*]] = load float, ptr [[TMP87]], align 4
|
||||
; I32-NEXT: [[TMP90:%.*]] = load float, ptr [[TMP89]], align 4
|
||||
; I32-NEXT: [[TMP92:%.*]] = load float, ptr [[TMP91]], align 4
|
||||
; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
|
||||
; I32-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
|
||||
; I32-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
|
||||
; I32-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
|
||||
; I32-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
|
||||
@@ -856,7 +849,6 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
|
||||
; I64-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; I64: [[VECTOR_BODY]]:
|
||||
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; I64-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; I64-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; I64-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -864,7 +856,7 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
|
||||
; I64-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
|
||||
; I64-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
|
||||
; I64-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; I64-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
|
||||
; I64-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], [[OFFSET]]
|
||||
; I64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
|
||||
; I64-NEXT: [[TMP10:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
|
||||
; I64-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
|
||||
@@ -973,11 +965,10 @@ define void @address_use_in_different_block(ptr noalias %dst, ptr %src.0, ptr %s
|
||||
; I32-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; I32: [[VECTOR_BODY]]:
|
||||
; I32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I32-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; I32-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; I32-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; I32-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; I32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP0]], [[OFFSET]]
|
||||
; I32-NEXT: [[TMP4:%.*]] = mul i64 [[INDEX]], [[OFFSET]]
|
||||
; I32-NEXT: [[TMP5:%.*]] = mul i64 [[TMP1]], [[OFFSET]]
|
||||
; I32-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], [[OFFSET]]
|
||||
; I32-NEXT: [[TMP7:%.*]] = mul i64 [[TMP3]], [[OFFSET]]
|
||||
@@ -1096,8 +1087,6 @@ define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, pt
|
||||
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
|
||||
; I64-NEXT: [[TMP15:%.*]] = mul i32 0, [[STEP]]
|
||||
; I64-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], [[TMP15]]
|
||||
; I64-NEXT: [[TMP17:%.*]] = mul i32 1, [[STEP]]
|
||||
; I64-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], [[TMP17]]
|
||||
; I64-NEXT: [[TMP19:%.*]] = mul i32 2, [[STEP]]
|
||||
@@ -1112,7 +1101,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, pt
|
||||
; I64-NEXT: [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], [[TMP27]]
|
||||
; I64-NEXT: [[TMP29:%.*]] = mul i32 7, [[STEP]]
|
||||
; I64-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], [[TMP29]]
|
||||
; I64-NEXT: [[TMP31:%.*]] = zext i32 [[TMP16]] to i64
|
||||
; I64-NEXT: [[TMP31:%.*]] = zext i32 [[OFFSET_IDX]] to i64
|
||||
; I64-NEXT: [[TMP32:%.*]] = zext i32 [[TMP18]] to i64
|
||||
; I64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP20]] to i64
|
||||
; I64-NEXT: [[TMP34:%.*]] = zext i32 [[TMP22]] to i64
|
||||
@@ -1220,8 +1209,6 @@ define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, p
|
||||
; I64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; I64-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; I64-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
|
||||
; I64-NEXT: [[TMP7:%.*]] = mul i32 0, [[STEP]]
|
||||
; I64-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], [[TMP7]]
|
||||
; I64-NEXT: [[TMP9:%.*]] = mul i32 1, [[STEP]]
|
||||
; I64-NEXT: [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], [[TMP9]]
|
||||
; I64-NEXT: [[TMP11:%.*]] = mul i32 2, [[STEP]]
|
||||
@@ -1236,7 +1223,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, p
|
||||
; I64-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], [[TMP19]]
|
||||
; I64-NEXT: [[TMP21:%.*]] = mul i32 7, [[STEP]]
|
||||
; I64-NEXT: [[TMP22:%.*]] = add i32 [[OFFSET_IDX]], [[TMP21]]
|
||||
; I64-NEXT: [[TMP23:%.*]] = zext i32 [[TMP8]] to i64
|
||||
; I64-NEXT: [[TMP23:%.*]] = zext i32 [[OFFSET_IDX]] to i64
|
||||
; I64-NEXT: [[TMP24:%.*]] = zext i32 [[TMP10]] to i64
|
||||
; I64-NEXT: [[TMP25:%.*]] = zext i32 [[TMP12]] to i64
|
||||
; I64-NEXT: [[TMP26:%.*]] = zext i32 [[TMP14]] to i64
|
||||
|
||||
@@ -26,7 +26,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -58,7 +57,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
|
||||
@@ -66,7 +65,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
|
||||
@@ -193,13 +192,12 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0
|
||||
; CHECK-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1
|
||||
; CHECK-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2
|
||||
; CHECK-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3
|
||||
; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
|
||||
; CHECK-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
|
||||
; CHECK-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
|
||||
@@ -257,7 +255,6 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; MAX-BW-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP145:%.*]], %[[VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[VEC_PHI2:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP146:%.*]], %[[VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[VEC_PHI3:%.*]] = phi <8 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP147:%.*]], %[[VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -289,7 +286,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; MAX-BW-NEXT: [[TMP29:%.*]] = add i64 [[INDEX]], 29
|
||||
; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30
|
||||
; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31
|
||||
; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]]
|
||||
; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX]]
|
||||
; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8
|
||||
; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16
|
||||
; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24
|
||||
@@ -297,7 +294,7 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP43:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
|
||||
@@ -424,13 +421,12 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
|
||||
; MAX-BW: [[VEC_EPILOG_VECTOR_BODY]]:
|
||||
; MAX-BW-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i32> [ [[TMP171]], %[[VEC_EPILOG_PH]] ], [ [[TMP168:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[TMP172:%.*]] = add i64 [[INDEX9]], 0
|
||||
; MAX-BW-NEXT: [[TMP173:%.*]] = add i64 [[INDEX9]], 1
|
||||
; MAX-BW-NEXT: [[TMP174:%.*]] = add i64 [[INDEX9]], 2
|
||||
; MAX-BW-NEXT: [[TMP175:%.*]] = add i64 [[INDEX9]], 3
|
||||
; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP172]]
|
||||
; MAX-BW-NEXT: [[TMP152:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[INDEX9]]
|
||||
; MAX-BW-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i32>, ptr [[TMP152]], align 4, !tbaa [[INT_TBAA1]]
|
||||
; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP172]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP154:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[INDEX9]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP155:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP173]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP156:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP174]], i64 [[IDXPROM5]]
|
||||
; MAX-BW-NEXT: [[TMP157:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP175]], i64 [[IDXPROM5]]
|
||||
@@ -507,7 +503,6 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -515,7 +510,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
@@ -529,7 +524,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i8> [[TMP19]], i32 5
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i8> [[TMP19]], i32 6
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i8> [[TMP19]], i32 7
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]]
|
||||
@@ -562,7 +557,6 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; MAX-BW: [[VECTOR_BODY]]:
|
||||
; MAX-BW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; MAX-BW-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; MAX-BW-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; MAX-BW-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; MAX-BW-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; MAX-BW-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -578,7 +572,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; MAX-BW-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26
|
||||
; MAX-BW-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28
|
||||
; MAX-BW-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30
|
||||
; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[TMP0]]
|
||||
; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i64 0, i64 [[OFFSET_IDX]]
|
||||
; MAX-BW-NEXT: [[WIDE_VEC:%.*]] = load <32 x i32>, ptr [[TMP32]], align 4
|
||||
; MAX-BW-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
|
||||
; MAX-BW-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <32 x i32> [[WIDE_VEC]], <32 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
|
||||
@@ -600,7 +594,7 @@ define void @test(ptr %A, ptr noalias %B) #0 {
|
||||
; MAX-BW-NEXT: [[TMP65:%.*]] = extractelement <16 x i8> [[TMP35]], i32 13
|
||||
; MAX-BW-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[TMP35]], i32 14
|
||||
; MAX-BW-NEXT: [[TMP67:%.*]] = extractelement <16 x i8> [[TMP35]], i32 15
|
||||
; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP0]]
|
||||
; MAX-BW-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[OFFSET_IDX]]
|
||||
; MAX-BW-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP1]]
|
||||
; MAX-BW-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP2]]
|
||||
; MAX-BW-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1024 x i8], ptr [[B]], i64 0, i64 [[TMP3]]
|
||||
|
||||
@@ -198,9 +198,6 @@ define void @uniform_store_varying_value(ptr align(4) %addr) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 12
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP0]], 13
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], 14
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], 15
|
||||
; CHECK-NEXT: store i32 [[TMP7]], ptr [[ADDR:%.*]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||
|
||||
@@ -16,11 +16,10 @@ define void @test(ptr %A) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shl nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 1
|
||||
|
||||
@@ -11,15 +11,13 @@ define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) {
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select i1 false, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = select i1 true, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = select i1 true, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr { float, float }, ptr [[FOO]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[FOO]], align 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP8]], align 4
|
||||
|
||||
@@ -12,11 +12,9 @@ define void @copy_bitcast_fusion(ptr noalias %foo, ptr noalias %bar) {
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = select i1 false, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = select i1 true, i64 1, i64 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[FOO]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[FOO]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP5]], i32 1
|
||||
|
||||
@@ -30,12 +30,11 @@ define void @example() {
|
||||
; FORCED: [[VECTOR_BODY]]:
|
||||
; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; FORCED-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; FORCED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; FORCED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; FORCED-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[VEC_IND]] to <2 x x86_fp80>
|
||||
; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 0
|
||||
; FORCED-NEXT: [[TMP6:%.*]] = extractelement <2 x x86_fp80> [[TMP2]], i32 1
|
||||
; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP0]]
|
||||
; FORCED-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[INDEX]]
|
||||
; FORCED-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x x86_fp80], ptr @x, i64 0, i64 [[TMP1]]
|
||||
; FORCED-NEXT: store x86_fp80 [[TMP5]], ptr [[TMP3]], align 16
|
||||
; FORCED-NEXT: store x86_fp80 [[TMP6]], ptr [[TMP4]], align 16
|
||||
|
||||
@@ -241,15 +241,14 @@ define i32 @interleaved_access_forward(ptr %p, i64 %n) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX1]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX1]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
|
||||
@@ -388,15 +387,14 @@ define i32 @interleaved_access_reverse(ptr %p, i64 %n) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP18]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[OFFSET_IDX]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP18]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[OFFSET_IDX]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP5]], i32 1
|
||||
@@ -541,11 +539,10 @@ define void @predicated_store(ptr %p, i32 %x, i64 %n) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP3]], i32 0
|
||||
@@ -731,11 +728,10 @@ define void @irregular_type(ptr %a, i64 %n) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]]
|
||||
@@ -775,11 +771,10 @@ define void @irregular_type(ptr %a, i64 %n) {
|
||||
; INTER-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; INTER: [[VECTOR_BODY]]:
|
||||
; INTER-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; INTER-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
|
||||
; INTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX1]], 1
|
||||
; INTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 2
|
||||
; INTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 3
|
||||
; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX]]
|
||||
; INTER-NEXT: [[TMP0:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[INDEX1]]
|
||||
; INTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP1]]
|
||||
; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP5]]
|
||||
; INTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds x86_fp80, ptr [[A]], i64 [[TMP3]]
|
||||
@@ -944,11 +939,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX1]], 48
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]]
|
||||
@@ -1059,11 +1053,10 @@ define void @pointer_iv_non_uniform_0(ptr %a, i64 %n) {
|
||||
; INTER: [[VECTOR_BODY]]:
|
||||
; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
|
||||
; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; INTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX1]], 16
|
||||
; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 32
|
||||
; INTER-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX1]], 48
|
||||
; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
|
||||
; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
|
||||
; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
|
||||
; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]]
|
||||
@@ -1178,11 +1171,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
|
||||
@@ -1227,11 +1219,10 @@ define void @pointer_iv_non_uniform_1(ptr %a, i64 %n) {
|
||||
; INTER: [[VECTOR_BODY]]:
|
||||
; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; INTER-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX]], 16
|
||||
; INTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OFFSET_IDX1]], 0
|
||||
; INTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX1]], 16
|
||||
; INTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX1]], 32
|
||||
; INTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX1]], 48
|
||||
; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX1]]
|
||||
; INTER-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
|
||||
; INTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
|
||||
; INTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
|
||||
@@ -1466,11 +1457,10 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[INDEX1]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX1]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX1]], 2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX1]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
|
||||
@@ -1495,7 +1485,7 @@ define void @pointer_operand_geps_with_different_indexed_types(ptr %A, ptr %B, i
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i8> [[TMP27]], i8 [[TMP24]], i32 2
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP32]], i8 [[TMP25]], i32 3
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i8> [[TMP20]], [[TMP28]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX1]]
|
||||
; CHECK-NEXT: store <4 x i8> [[TMP29]], ptr [[TMP30]], align 1, !alias.scope [[META30:![0-9]+]], !noalias [[META27]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 4
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
|
||||
@@ -185,11 +185,10 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
|
||||
; CHECK-NEXT: store i64 [[TMP49]], ptr [[TMP48]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE28]]:
|
||||
; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 0
|
||||
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 1
|
||||
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 2
|
||||
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 3
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP50]], i32 0
|
||||
; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i32 0
|
||||
; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x ptr> [[TMP54]], ptr [[TMP51]], i32 1
|
||||
; CHECK-NEXT: [[TMP56:%.*]] = insertelement <4 x ptr> [[TMP55]], ptr [[TMP52]], i32 2
|
||||
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <4 x ptr> [[TMP56]], ptr [[TMP53]], i32 3
|
||||
@@ -203,7 +202,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = insertelement <4 x ptr> [[TMP64]], ptr [[TMP61]], i32 3
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF29]]:
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[TMP50]], align 1
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = load i8, ptr [[B]], align 1
|
||||
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i8> poison, i8 [[TMP66]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
|
||||
; CHECK: [[PRED_LOAD_CONTINUE30]]:
|
||||
@@ -270,7 +269,7 @@ define void @preserve_flags_narrowing_extends_and_truncs(ptr noalias %A, ptr noa
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46:.*]]
|
||||
; CHECK: [[PRED_STORE_IF45]]:
|
||||
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i8> [[TMP100]], i32 0
|
||||
; CHECK-NEXT: store i8 [[TMP102]], ptr [[TMP50]], align 1
|
||||
; CHECK-NEXT: store i8 [[TMP102]], ptr [[B]], align 1
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE46]]:
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF47:.*]], label %[[PRED_STORE_CONTINUE48:.*]]
|
||||
|
||||
@@ -173,11 +173,10 @@ define void @test_scalar_steps(ptr nocapture %a, ptr noalias %b, i64 %size) !dbg
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0, !dbg [[LOC8:!.+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2, !dbg [[LOC8:!.+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4
|
||||
|
||||
@@ -454,11 +454,10 @@ define i64 @select_argmin_iv_not_canonical(i64 %num, ptr %src) {
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP4]]
|
||||
|
||||
@@ -325,11 +325,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[RDX_SELECT_CMP:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 48
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP7]]
|
||||
@@ -378,11 +377,10 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) {
|
||||
; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP43:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX13:%.*]] = mul i64 [[INDEX11]], 16
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[OFFSET_IDX13]], 0
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[OFFSET_IDX13]], 16
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[OFFSET_IDX13]], 32
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[OFFSET_IDX13]], 48
|
||||
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]]
|
||||
; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX13]]
|
||||
; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
|
||||
; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP28]]
|
||||
; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP29]]
|
||||
|
||||
@@ -169,7 +169,6 @@ define i64 @findlast_iv_step2_interleave(ptr %a, i64 %n) {
|
||||
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP39:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -177,7 +176,7 @@ define i64 @findlast_iv_step2_interleave(ptr %a, i64 %n) {
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 14
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
|
||||
|
||||
@@ -312,11 +312,10 @@ define i64 @findlast_non_canonical_iv_with_expr(ptr %a, i64 %n) {
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 10, [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
|
||||
|
||||
@@ -15,7 +15,6 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
|
||||
@@ -23,7 +22,7 @@ define i8 @recurrence_phi_with_same_incoming_values_after_simplifications(i8 %fo
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 5
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 7
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP4]]
|
||||
|
||||
@@ -20,13 +20,14 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
|
||||
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_LOAD_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
|
||||
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -45,7 +46,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_STORE_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
|
||||
@@ -96,16 +97,19 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
|
||||
; VF2IC2-NEXT: [[TMP69:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> [[TMP69]], i64 [[TMP7]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_LOAD_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
|
||||
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -143,7 +147,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_STORE_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
|
||||
@@ -313,13 +317,14 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
|
||||
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_LOAD_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
|
||||
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -338,7 +343,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_STORE_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
|
||||
@@ -385,16 +390,19 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
|
||||
; VF2IC2-NEXT: [[TMP65:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> [[TMP65]], i64 [[TMP7]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_LOAD_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
|
||||
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -432,7 +440,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_STORE_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
|
||||
@@ -595,13 +603,14 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; VF2IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC1-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC1-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP4]], i32 1
|
||||
; VF2IC1-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC1-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_LOAD_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
|
||||
; VF2IC1-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; VF2IC1-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -620,7 +629,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC1-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; VF2IC1-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC1: [[PRED_STORE_IF]]:
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC1-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0
|
||||
; VF2IC1-NEXT: [[TMP18:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
|
||||
@@ -673,16 +682,19 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 33>, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ]
|
||||
; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[TMP1]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
|
||||
; VF2IC2-NEXT: [[TMP74:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP75:%.*]] = insertelement <2 x i64> [[TMP74]], i64 [[TMP7]], i32 1
|
||||
; VF2IC2-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
|
||||
; VF2IC2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_LOAD_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
; VF2IC2-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP23]], i32 0
|
||||
; VF2IC2-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -720,7 +732,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) {
|
||||
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; VF2IC2: [[PRED_STORE_IF]]:
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP26]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP31:%.*]] = extractelement <2 x i32> [[TMP15]], i32 0
|
||||
; VF2IC2-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP30]], [[TMP31]]
|
||||
|
||||
@@ -1041,8 +1041,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
|
||||
; UNROLL-NO-IC: vector.body:
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
|
||||
@@ -1132,8 +1130,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
|
||||
; SINK-AFTER: vector.body:
|
||||
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
|
||||
@@ -1423,7 +1419,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
|
||||
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
|
||||
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
|
||||
@@ -1431,7 +1426,7 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1000
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 1200
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1400
|
||||
; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; UNROLL-NO-IC-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; UNROLL-NO-IC-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; UNROLL-NO-IC-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; UNROLL-NO-IC-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -1595,11 +1590,10 @@ define i32 @PR33613(ptr %b, double %j, i32 %d, i32 %n) {
|
||||
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
|
||||
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
|
||||
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 200
|
||||
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 200
|
||||
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 400
|
||||
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 600
|
||||
; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
|
||||
; SINK-AFTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; SINK-AFTER-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
|
||||
; SINK-AFTER-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
|
||||
; SINK-AFTER-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -1898,7 +1892,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
||||
; UNROLL-NO-IC: vector.body:
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1906,8 +1899,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
|
||||
@@ -1942,7 +1935,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32>
|
||||
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4
|
||||
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4
|
||||
; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4
|
||||
@@ -2046,12 +2039,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
||||
; SINK-AFTER: vector.body:
|
||||
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
|
||||
; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]]
|
||||
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP0]], i64 1
|
||||
; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]]
|
||||
; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[INDEX]], i64 1
|
||||
; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1
|
||||
; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
|
||||
; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
|
||||
@@ -2068,7 +2060,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
|
||||
; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
|
||||
; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32>
|
||||
; SINK-AFTER-NEXT: [[TMP21:%.*]] = mul nsw <4 x i32> [[TMP20]], [[TMP19]]
|
||||
; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
|
||||
; SINK-AFTER-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
|
||||
; SINK-AFTER-NEXT: store <4 x i32> [[TMP21]], ptr [[TMP22]], align 4
|
||||
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
@@ -2861,20 +2853,27 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4)
|
||||
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
|
||||
; UNROLL-NO-IC-NEXT: [[TMP76:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP3]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP78:%.*]] = insertelement <4 x i32> [[TMP77]], i32 [[TMP4]], i32 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP79:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP5]], i32 3
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
|
||||
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
|
||||
; UNROLL-NO-IC-NEXT: [[TMP80:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP80]], i32 [[TMP7]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP81]], i32 [[TMP8]], i32 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP83:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP9]], i32 3
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
|
||||
; UNROLL-NO-IC: pred.udiv.if:
|
||||
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]]
|
||||
; UNROLL-NO-IC: pred.udiv.continue:
|
||||
@@ -2944,7 +2943,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
|
||||
; UNROLL-NO-IC: pred.store.if:
|
||||
; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = add i32 [[INDEX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP49]]
|
||||
; UNROLL-NO-IC-NEXT: store i32 [[TMP2]], ptr [[TMP50]], align 4
|
||||
; UNROLL-NO-IC-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP50]], align 4
|
||||
; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; UNROLL-NO-IC: pred.store.continue:
|
||||
; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
|
||||
@@ -3096,15 +3095,18 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
|
||||
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE12]] ]
|
||||
; SINK-AFTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE12]] ]
|
||||
; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
|
||||
; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
|
||||
; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
|
||||
; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
|
||||
; SINK-AFTER-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> poison, i32 [[OFFSET_IDX]], i32 0
|
||||
; SINK-AFTER-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP3]], i32 1
|
||||
; SINK-AFTER-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP4]], i32 2
|
||||
; SINK-AFTER-NEXT: [[TMP43:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP5]], i32 3
|
||||
; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
|
||||
; SINK-AFTER-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
|
||||
; SINK-AFTER-NEXT: br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
|
||||
; SINK-AFTER: pred.udiv.if:
|
||||
; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
|
||||
; SINK-AFTER-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
|
||||
; SINK-AFTER-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
|
||||
; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE]]
|
||||
; SINK-AFTER: pred.udiv.continue:
|
||||
@@ -3140,7 +3142,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
|
||||
; SINK-AFTER: pred.store.if:
|
||||
; SINK-AFTER-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 0
|
||||
; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP26]]
|
||||
; SINK-AFTER-NEXT: store i32 [[TMP2]], ptr [[TMP27]], align 4
|
||||
; SINK-AFTER-NEXT: store i32 [[OFFSET_IDX]], ptr [[TMP27]], align 4
|
||||
; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]]
|
||||
; SINK-AFTER: pred.store.continue:
|
||||
; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
|
||||
|
||||
@@ -1725,6 +1725,712 @@ exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define void @fp_iv_used_in_gep_fadd(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) {
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fadd(
|
||||
; VEC4_INTERL1-NEXT: entry:
|
||||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
|
||||
; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT2]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[TMP5]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC4_INTERL1: vector.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]]
|
||||
; VEC4_INTERL1-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP9]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], 3.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP11]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32>
|
||||
; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1
|
||||
; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2
|
||||
; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3
|
||||
; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP18]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP20:%.*]] = sext i32 [[TMP15]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP22:%.*]] = sext i32 [[TMP16]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP17]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP24]]
|
||||
; VEC4_INTERL1-NEXT: store float [[OFFSET_IDX]], ptr [[TMP19]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP8]], ptr [[TMP21]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP10]], ptr [[TMP23]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP12]], ptr [[TMP25]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
||||
; VEC4_INTERL1: middle.block:
|
||||
; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC4_INTERL1: scalar.ph:
|
||||
; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL7:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL1: for.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL7]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[TMP27:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]]
|
||||
; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]]
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
||||
; VEC4_INTERL1: exit:
|
||||
; VEC4_INTERL1-NEXT: ret void
|
||||
;
|
||||
; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fadd(
|
||||
; VEC4_INTERL2-NEXT: entry:
|
||||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC:%.*]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], splat (float 4.000000e+00)
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT2]], [[TMP6]]
|
||||
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: vector.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], [[TMP5]]
|
||||
; VEC4_INTERL2-NEXT: [[DOTCAST3:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST3]]
|
||||
; VEC4_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP9]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], 3.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP11]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP13]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fmul fast float [[FPINC]], 5.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP15]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fmul fast float [[FPINC]], 6.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP17]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fmul fast float [[FPINC]], 7.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP19]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32>
|
||||
; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i64 1
|
||||
; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2
|
||||
; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i64 3
|
||||
; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fptoui <4 x float> [[STEP_ADD]] to <4 x i32>
|
||||
; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i64 1
|
||||
; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i64 2
|
||||
; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i64 3
|
||||
; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = sext i32 [[TMP22]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP31]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = sext i32 [[TMP23]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = sext i32 [[TMP24]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = sext i32 [[TMP25]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP37]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP39:%.*]] = sext i32 [[TMP27]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP39]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP41:%.*]] = sext i32 [[TMP28]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP43:%.*]] = sext i32 [[TMP29]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP45:%.*]] = sext i32 [[TMP30]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP45]]
|
||||
; VEC4_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP32]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP8]], ptr [[TMP34]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP10]], ptr [[TMP36]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP12]], ptr [[TMP38]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP14]], ptr [[TMP40]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP16]], ptr [[TMP42]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP18]], ptr [[TMP44]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP20]], ptr [[TMP46]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[STEP_ADD]], [[TMP5]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
||||
; VEC4_INTERL2: middle.block:
|
||||
; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC4_INTERL2: scalar.ph:
|
||||
; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: for.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[TMP48:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]]
|
||||
; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]]
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
||||
; VEC4_INTERL2: exit:
|
||||
; VEC4_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fadd(
|
||||
; VEC1_INTERL2-NEXT: entry:
|
||||
; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC1_INTERL2: vector.ph:
|
||||
; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
|
||||
; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC1_INTERL2: vector.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
|
||||
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP5]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fptoui float [[OFFSET_IDX]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fptoui float [[TMP6]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = sext i32 [[TMP8]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
|
||||
; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP10]], align 4
|
||||
; VEC1_INTERL2-NEXT: store float [[TMP6]], ptr [[TMP12]], align 4
|
||||
; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
||||
; VEC1_INTERL2: middle.block:
|
||||
; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC1_INTERL2: scalar.ph:
|
||||
; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC1_INTERL2: for.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
|
||||
; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]]
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
||||
; VEC1_INTERL2: exit:
|
||||
; VEC1_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fadd(
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: entry:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: vector.ph:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], <float 0.000000e+00, float 1.000000e+00>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[BROADCAST_SPLAT]], [[TMP5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: vector.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP7]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fptoui <2 x float> [[VEC_IND]] to <2 x i32>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP9]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP9]], i64 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = sext i32 [[TMP10]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP12]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = sext i32 [[TMP11]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[OFFSET_IDX]], ptr [[TMP13]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP8]], ptr [[TMP15]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
|
||||
; VEC2_INTERL1_PRED_STORE: middle.block:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC2_INTERL1_PRED_STORE: for.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_05]], [[FPINC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
||||
; VEC2_INTERL1_PRED_STORE: exit:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%x.05 = phi float [ %init, %entry ], [ %add, %for.body ]
|
||||
%c = fptoui float %x.05 to i32
|
||||
%arrayidx = getelementptr inbounds float, ptr %A, i32 %c
|
||||
store float %x.05, ptr %arrayidx, align 4
|
||||
%add = fadd fast float %x.05, %fpinc
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %exit, label %for.body
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fp_iv_used_in_gep_fsub(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) {
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fsub(
|
||||
; VEC4_INTERL1-NEXT: entry:
|
||||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
|
||||
; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL1: vector.ph:
|
||||
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
|
||||
; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT2]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[TMP5]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC4_INTERL1: vector.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]]
|
||||
; VEC4_INTERL1-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], -2.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP9]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], -3.000000e+00
|
||||
; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP11]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32>
|
||||
; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0
|
||||
; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1
|
||||
; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2
|
||||
; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3
|
||||
; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = sext i32 [[TMP14]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP18]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP20:%.*]] = sext i32 [[TMP15]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP22:%.*]] = sext i32 [[TMP16]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP17]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP24]]
|
||||
; VEC4_INTERL1-NEXT: store float [[OFFSET_IDX]], ptr [[TMP19]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP8]], ptr [[TMP21]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP10]], ptr [[TMP23]], align 4
|
||||
; VEC4_INTERL1-NEXT: store float [[TMP12]], ptr [[TMP25]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]]
|
||||
; VEC4_INTERL1-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; VEC4_INTERL1: middle.block:
|
||||
; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC4_INTERL1: scalar.ph:
|
||||
; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL7:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL1: for.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL7]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[TMP27:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]]
|
||||
; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
||||
; VEC4_INTERL1: exit:
|
||||
; VEC4_INTERL1-NEXT: ret void
|
||||
;
|
||||
; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fsub(
|
||||
; VEC4_INTERL2-NEXT: entry:
|
||||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
|
||||
; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC4_INTERL2: vector.ph:
|
||||
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[FPINC:%.*]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], splat (float 4.000000e+00)
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer
|
||||
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
|
||||
; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT2]], [[TMP6]]
|
||||
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: vector.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[TMP5]]
|
||||
; VEC4_INTERL2-NEXT: [[DOTCAST3:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST3]]
|
||||
; VEC4_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fmul fast float [[FPINC]], -2.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP9]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fmul fast float [[FPINC]], -3.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP11]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP13]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fmul fast float [[FPINC]], 3.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP15]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = fsub fast float [[OFFSET_IDX]], [[TMP17]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = fptoui <4 x float> [[VEC_IND]] to <4 x i32>
|
||||
; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP21]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP21]], i64 1
|
||||
; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP21]], i64 2
|
||||
; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP21]], i64 3
|
||||
; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fptoui <4 x float> [[STEP_ADD]] to <4 x i32>
|
||||
; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP26]], i64 0
|
||||
; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i64 1
|
||||
; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP26]], i64 2
|
||||
; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP26]], i64 3
|
||||
; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = sext i32 [[TMP22]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP31]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = sext i32 [[TMP23]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP33]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = sext i32 [[TMP24]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = sext i32 [[TMP25]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP37]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP39:%.*]] = sext i32 [[TMP27]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP39]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP41:%.*]] = sext i32 [[TMP28]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP42:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP41]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP43:%.*]] = sext i32 [[TMP29]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP44:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP43]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP45:%.*]] = sext i32 [[TMP30]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[TMP46:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP45]]
|
||||
; VEC4_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP32]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP8]], ptr [[TMP34]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP10]], ptr [[TMP36]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP12]], ptr [[TMP38]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP14]], ptr [[TMP40]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP16]], ptr [[TMP42]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP18]], ptr [[TMP44]], align 4
|
||||
; VEC4_INTERL2-NEXT: store float [[TMP20]], ptr [[TMP46]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[TMP5]]
|
||||
; VEC4_INTERL2-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; VEC4_INTERL2: middle.block:
|
||||
; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC4_INTERL2: scalar.ph:
|
||||
; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: for.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL5]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[TMP48:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP48]]
|
||||
; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
||||
; VEC4_INTERL2: exit:
|
||||
; VEC4_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fsub(
|
||||
; VEC1_INTERL2-NEXT: entry:
|
||||
; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC1_INTERL2: vector.ph:
|
||||
; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
|
||||
; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC1_INTERL2: vector.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
|
||||
; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fptoui float [[OFFSET_IDX]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fptoui float [[TMP6]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]]
|
||||
; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = sext i32 [[TMP8]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
|
||||
; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP10]], align 4
|
||||
; VEC1_INTERL2-NEXT: store float [[TMP6]], ptr [[TMP12]], align 4
|
||||
; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; VEC1_INTERL2: middle.block:
|
||||
; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
|
||||
; VEC1_INTERL2: scalar.ph:
|
||||
; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC1_INTERL2: for.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
|
||||
; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC1_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
||||
; VEC1_INTERL2: exit:
|
||||
; VEC1_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fsub(
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: entry:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: vector.ph:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC:%.*]], [[DOTCAST]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT2]], <float 0.000000e+00, float 1.000000e+00>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[BROADCAST_SPLAT]], [[TMP5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: vector.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[FPINC]], [[DOTCAST5]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP7]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[FPINC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fptoui <2 x float> [[VEC_IND]] to <2 x i32>
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP9]], i64 0
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP9]], i64 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = sext i32 [[TMP10]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP12]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = sext i32 [[TMP11]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP14]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[OFFSET_IDX]], ptr [[TMP13]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP8]], ptr [[TMP15]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[BROADCAST_SPLAT4]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; VEC2_INTERL1_PRED_STORE: middle.block:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC2_INTERL1_PRED_STORE: for.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[ENTRY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP17]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
|
||||
; VEC2_INTERL1_PRED_STORE: exit:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%x.05 = phi float [ %init, %entry ], [ %add, %for.body ]
|
||||
%c = fptoui float %x.05 to i32
|
||||
%arrayidx = getelementptr inbounds float, ptr %A, i32 %c
|
||||
store float %x.05, ptr %arrayidx, align 4
|
||||
%add = fsub fast float %x.05, %fpinc
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %exit, label %for.body
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fp_iv_used_in_gep_fmul(float %init, ptr noalias nocapture %A, float %fpinc, i32 %N) {
|
||||
; VEC4_INTERL1-LABEL: @fp_iv_used_in_gep_fmul(
|
||||
; VEC4_INTERL1-NEXT: entry:
|
||||
; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL1: for.body:
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL1-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
|
||||
; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL1-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]]
|
||||
; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC4_INTERL1: exit:
|
||||
; VEC4_INTERL1-NEXT: ret void
|
||||
;
|
||||
; VEC4_INTERL2-LABEL: @fp_iv_used_in_gep_fmul(
|
||||
; VEC4_INTERL2-NEXT: entry:
|
||||
; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC4_INTERL2: for.body:
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC4_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
|
||||
; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC4_INTERL2-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]]
|
||||
; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]]
|
||||
; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC4_INTERL2: exit:
|
||||
; VEC4_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC1_INTERL2-LABEL: @fp_iv_used_in_gep_fmul(
|
||||
; VEC1_INTERL2-NEXT: entry:
|
||||
; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC1_INTERL2: for.body:
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC1_INTERL2-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
|
||||
; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC1_INTERL2-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]]
|
||||
; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]]
|
||||
; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC1_INTERL2: exit:
|
||||
; VEC1_INTERL2-NEXT: ret void
|
||||
;
|
||||
; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_used_in_gep_fmul(
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: entry:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_BODY:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE: for.body:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[INIT:%.*]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[C:%.*]] = fptoui float [[X_05]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = sext i32 [[C]] to i64
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fmul fast float [[X_05]], [[FPINC:%.*]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N:%.*]], [[LFTR_WIDEIV]]
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR_BODY]]
|
||||
; VEC2_INTERL1_PRED_STORE: exit:
|
||||
; VEC2_INTERL1_PRED_STORE-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%x.05 = phi float [ %init, %entry ], [ %add, %for.body ]
|
||||
%c = fptoui float %x.05 to i32
|
||||
%arrayidx = getelementptr inbounds float, ptr %A, i32 %c
|
||||
store float %x.05, ptr %arrayidx, align 4
|
||||
%add = fmul fast float %x.05, %fpinc
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %N
|
||||
br i1 %exitcond, label %exit, label %for.body
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
!llvm.module.flags = !{!3}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1)
|
||||
|
||||
@@ -22,12 +22,11 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META3]]
|
||||
@@ -35,7 +34,7 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5)
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
|
||||
@@ -109,16 +108,17 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 99, ptr [[TMP10]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META15]], !noalias [[META17]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
|
||||
@@ -139,7 +139,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF12]]:
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
|
||||
@@ -155,7 +155,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK: [[PRED_LOAD_CONTINUE15]]:
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0
|
||||
@@ -239,15 +239,16 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 10, ptr [[TMP10]], align 4, !alias.scope [[META25:![0-9]+]], !noalias [[META27:![0-9]+]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
@@ -258,7 +259,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
|
||||
; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE20]]:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META30:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META30]]
|
||||
@@ -266,7 +267,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5)
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0
|
||||
@@ -342,15 +343,16 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META35:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META38:![0-9]+]], !noalias [[META40:![0-9]+]]
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
; CHECK: [[PRED_LOAD_CONTINUE]]:
|
||||
@@ -361,14 +363,14 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p
|
||||
; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4, !alias.scope [[META38]], !noalias [[META40]]
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
|
||||
; CHECK: [[PRED_LOAD_CONTINUE11]]:
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP25]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP32]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP16]], <2 x i32> [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0
|
||||
@@ -455,15 +457,16 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4, !alias.scope [[META48:![0-9]+]], !noalias [[META50:![0-9]+]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
@@ -474,14 +477,14 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr %
|
||||
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE20]]:
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META53:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META53]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0
|
||||
@@ -557,15 +560,16 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -580,7 +584,7 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
|
||||
; CHECK: [[PRED_LOAD_CONTINUE7]]:
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
|
||||
@@ -651,19 +655,20 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE15:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META68:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 20, ptr [[TMP6]], align 4, !alias.scope [[META71:![0-9]+]], !noalias [[META73:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META71]], !noalias [[META73]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[MID]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP8]], align 4, !alias.scope [[META75:![0-9]+]], !noalias [[META68]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
@@ -680,7 +685,7 @@ define void @test_stores_not_sunk_aliasing_load_between(ptr %dst, ptr %mid, ptr
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]]
|
||||
; CHECK: [[PRED_STORE_IF12]]:
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 10, ptr [[TMP14]], align 4, !alias.scope [[META71]], !noalias [[META73]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE13]]:
|
||||
@@ -747,9 +752,8 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP22]], i64 152
|
||||
@@ -763,7 +767,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
|
||||
@@ -846,9 +850,10 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[GEP_FLAG:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i64 152
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP3]], i64 152
|
||||
@@ -866,7 +871,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP18]], i32 0
|
||||
; CHECK-NEXT: store double [[TMP19]], ptr [[TMP20]], align 8, !alias.scope [[META88:![0-9]+]], !noalias [[META85]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -882,7 +887,7 @@ define void @sink_multiple_store_groups_alias_via_scev(ptr %dst, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
|
||||
; CHECK: [[PRED_STORE_IF4]]:
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr double, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: store double [[TMP13]], ptr [[TMP43]], align 8, !alias.scope [[META88]], !noalias [[META85]]
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i8, ptr [[TMP43]], i64 8
|
||||
; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP44]], align 8, !alias.scope [[META88]], !noalias [[META85]]
|
||||
@@ -1031,15 +1036,16 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META92:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META95:![0-9]+]], !noalias [[META92]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
@@ -1054,7 +1060,7 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
|
||||
; CHECK: [[PRED_STORE_IF4]]:
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META95]], !noalias [[META92]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE5]]:
|
||||
@@ -1069,7 +1075,7 @@ define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]]
|
||||
; CHECK: [[PRED_STORE_IF8]]:
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META95]], !noalias [[META92]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE9]]:
|
||||
@@ -1154,13 +1160,14 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src,
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP40]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META99:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1
|
||||
@@ -1200,7 +1207,7 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src,
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF14]]:
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope [[META102]], !noalias [[META104]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
|
||||
; CHECK-NEXT: store i32 99, ptr [[TMP23]], align 4, !alias.scope [[META102]], !noalias [[META104]]
|
||||
@@ -1221,7 +1228,7 @@ define void @stores_never_sunk_past_alising_loads_or_stores(ptr %dst, ptr %src,
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]]
|
||||
; CHECK: [[PRED_STORE_IF18]]:
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP32]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 4, !alias.scope [[META106:![0-9]+]], !noalias [[META99]]
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]]
|
||||
@@ -1302,16 +1309,17 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP33]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META109:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META112:![0-9]+]], !noalias [[META114:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
|
||||
; CHECK-NEXT: store i32 100, ptr [[TMP6]], align 4, !alias.scope [[META112]], !noalias [[META114]]
|
||||
@@ -1332,7 +1340,7 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store i32 99, ptr [[TMP17]], align 4, !alias.scope [[META112]], !noalias [[META114]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META112]], !noalias [[META114]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
|
||||
@@ -1350,7 +1358,7 @@ define void @loads_never_hoisted_past_alising_stores(ptr %dst, ptr %src, ptr %co
|
||||
; CHECK: [[PRED_LOAD_CONTINUE13]]:
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP24]], %[[PRED_LOAD_IF12]] ]
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10)
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP26]], <2 x i32> [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[TMP29]], i32 0
|
||||
|
||||
@@ -22,11 +22,10 @@ define void @test(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]]
|
||||
@@ -35,7 +34,7 @@ define void @test(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10)
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -100,16 +99,17 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP20]], align 4
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -128,7 +128,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF10]]:
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP49]], align 4
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
|
||||
@@ -144,7 +144,7 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
|
||||
; CHECK: [[PRED_LOAD_CONTINUE13]]:
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP27]], %[[PRED_LOAD_IF12]] ]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP33]], <2 x i32> [[TMP23]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -213,14 +213,13 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2)
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE17:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META14:![0-9]+]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META17:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
@@ -262,7 +261,7 @@ define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2)
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ [[TMP36]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP31]], %[[PRED_LOAD_IF16]] ]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP19]], <2 x i32> [[TMP28]], <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[PREDPHI18:%.*]] = select <2 x i1> [[TMP37]], <2 x i32> [[TMP32]], <2 x i32> [[PREDPHI]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI18]], ptr [[TMP41]], align 4, !alias.scope [[META21:![0-9]+]], !noalias [[META23:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -332,13 +331,12 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META26:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
|
||||
@@ -377,7 +375,7 @@ define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32>
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP30]], <2 x i32> [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META33:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -441,11 +439,10 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]]
|
||||
@@ -454,7 +451,7 @@ define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10)
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -581,12 +578,11 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]]
|
||||
@@ -594,7 +590,7 @@ define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10)
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -660,12 +656,11 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]]
|
||||
@@ -673,7 +668,7 @@ define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) {
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10)
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
@@ -784,16 +779,17 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP43]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP64]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -815,7 +811,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF3]]:
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
|
||||
@@ -835,7 +831,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF7]]:
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP61]], align 4
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]]
|
||||
@@ -852,7 +848,7 @@ define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %s
|
||||
; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP44]], %[[PRED_LOAD_IF9]] ]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP32]], <2 x i32> [[TMP22]], <2 x i32> [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP45]], <2 x i32> [[PREDPHI]]
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 32
|
||||
; CHECK-NEXT: store <2 x i32> [[TMP42]], ptr [[TMP40]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
@@ -924,7 +920,6 @@ define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %co
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8
|
||||
@@ -984,7 +979,6 @@ define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %co
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8
|
||||
@@ -1042,13 +1036,12 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 20)
|
||||
@@ -1108,7 +1101,7 @@ define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noali
|
||||
; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP38]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP41]], %[[PRED_LOAD_IF10]] ]
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]]
|
||||
; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
|
||||
|
||||
@@ -833,11 +833,10 @@ define void @sinkable_predicated_store(ptr %A, ptr %B) {
|
||||
; VEC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH:%.*]], label [[VECTOR_BODY:%.*]]
|
||||
; VEC: vector.body:
|
||||
; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; VEC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
|
||||
; VEC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]]
|
||||
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
|
||||
; VEC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]]
|
||||
; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8:![0-9]+]]
|
||||
; VEC-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], zeroinitializer
|
||||
; VEC-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1)
|
||||
|
||||
@@ -11,14 +11,12 @@ define void @multiple_iv_uses_in_same_instruction(ptr %ptr) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[TMP0]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR:%.*]], i64 0, i64 [[INDEX]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [100 x [100 x i32]], ptr [[PTR]], i64 0, i64 [[TMP1]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
|
||||
|
||||
@@ -22,9 +22,8 @@ define void @int_iv_based_on_pointer_iv(ptr %A) {
|
||||
; VF2: vector.body:
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
|
||||
; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
|
||||
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
|
||||
; VF2-NEXT: store i8 0, ptr [[TMP9]], align 1
|
||||
; VF2-NEXT: store i8 0, ptr [[TMP10]], align 1
|
||||
|
||||
@@ -863,15 +863,14 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP6]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4
|
||||
@@ -1065,11 +1064,10 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP3]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
|
||||
@@ -1081,7 +1079,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP10]], align 4
|
||||
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP3]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP5]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP6]]
|
||||
@@ -1237,9 +1235,8 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 8
|
||||
@@ -1391,11 +1388,10 @@ define void @scalarize_induction_variable_03(ptr %p, i32 %y, i64 %n) {
|
||||
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; UNROLL-NO-IC: vector.body:
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[TMP0]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP1]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1
|
||||
@@ -1564,7 +1560,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
@@ -1573,7 +1568,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP26]], align 1, !alias.scope [[META17:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP15]], align 1, !alias.scope [[META17]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1
|
||||
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
|
||||
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]]
|
||||
@@ -1774,7 +1769,6 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -1792,7 +1786,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 1, !alias.scope [[META17]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP22]], align 1, !alias.scope [[META17]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP38]], align 1, !alias.scope [[META17]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[TMP9]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1
|
||||
@@ -2428,13 +2422,12 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1
|
||||
; CHECK-NEXT: store i16 [[TMP9]], ptr [[TMP7]], align 2
|
||||
; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP8]], align 2
|
||||
@@ -2590,7 +2583,6 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
|
||||
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
|
||||
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -2602,7 +2594,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16>
|
||||
; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0
|
||||
; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[TMP3]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP4]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP5]], i32 1
|
||||
; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], ptr [[P]], i64 [[TMP6]], i32 1
|
||||
|
||||
@@ -20,17 +20,16 @@ define void @merge_tbaa_interleave_group(ptr nocapture readonly %p, ptr noalias
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_VEC4R:%.*]], ptr [[P]], i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA0:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP3]], align 8, !tbaa [[TBAA0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], splat (double 2.000000e+00)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[TMP0]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [20 x %struct.Vec2r], ptr [[CP]], i64 0, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_VEC4R]], ptr [[P]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8, !tbaa [[TBAA5:![0-9]+]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP11]], align 8, !tbaa [[TBAA5]]
|
||||
|
||||
@@ -1131,9 +1131,8 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
|
||||
; VEC: [[VECTOR_BODY]]:
|
||||
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
|
||||
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
|
||||
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]]
|
||||
; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
|
||||
; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
|
||||
; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
|
||||
|
||||
@@ -21,9 +21,10 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_LOAD_CONTINUE2]] ]
|
||||
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 -1000, [[DOTCAST]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[OFFSET_IDX]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE:%.*]], i16 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i1, ptr [[TEST_BASE]], i16 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i1, ptr [[TMP2]], align 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i1, ptr [[TMP3]], align 1
|
||||
@@ -31,7 +32,7 @@ define i8 @test_negative_off(i16 %len, ptr %test_base) {
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i1> [[TMP6]], i1 [[TMP5]], i32 1
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
|
||||
; CHECK: pred.load.if:
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[ALLOCA]], i16 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i8> poison, i8 [[TMP10]], i32 0
|
||||
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
|
||||
|
||||
@@ -1299,9 +1299,8 @@ define i16 @multiple_exit_none_via_latch(ptr %dst, i64 %x) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
|
||||
|
||||
@@ -17,9 +17,8 @@ define i16 @multiple_exit_one_with_constant_condition(ptr %dst, i64 %x) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP5]], align 8
|
||||
; CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8
|
||||
|
||||
@@ -14,9 +14,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr
|
||||
; VF4IC1: [[VECTOR_BODY]]:
|
||||
; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
|
||||
; VF4IC1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
|
||||
; VF4IC1-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
|
||||
; VF4IC1-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 2
|
||||
; VF4IC1-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 3
|
||||
; VF4IC1-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP5]]
|
||||
; VF4IC1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
|
||||
@@ -41,7 +38,6 @@ define void @narrow_select_to_single_scalar(i1 %invar.cond, ptr noalias %A, ptr
|
||||
; VF2IC2: [[VECTOR_BODY]]:
|
||||
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF2IC2-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 2
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 3
|
||||
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i16 [[TMP3]]
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 1
|
||||
@@ -86,7 +82,6 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
|
||||
; VF4IC1: [[VECTOR_BODY]]:
|
||||
; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; VF4IC1-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; VF4IC1-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; VF4IC1-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
@@ -99,7 +94,7 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
|
||||
; VF4IC1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP7]]
|
||||
; VF4IC1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP9]]
|
||||
; VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP11]]
|
||||
; VF4IC1-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4
|
||||
; VF4IC1-NEXT: store i32 [[INDEX]], ptr [[TMP6]], align 4
|
||||
; VF4IC1-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4
|
||||
; VF4IC1-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4
|
||||
; VF4IC1-NEXT: store i32 [[TMP3]], ptr [[TMP12]], align 4
|
||||
@@ -120,11 +115,10 @@ define void @narrow_to_single_scalar_store_address_not_uniform_across_all_parts(
|
||||
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; VF2IC2: [[VECTOR_BODY]]:
|
||||
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; VF2IC2-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0
|
||||
; VF2IC2-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1
|
||||
; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 2
|
||||
; VF2IC2-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 3
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP7]], 1
|
||||
; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i32 [[INDEX]], 1
|
||||
; VF2IC2-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 1
|
||||
; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP2]]
|
||||
; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i32 [[TMP3]]
|
||||
|
||||
@@ -142,18 +142,17 @@ define void @assume_loop_variant_operand_bundle(ptr noalias %a, ptr noalias %b)
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
|
||||
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ]
|
||||
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[INDEX]]) ]
|
||||
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ]
|
||||
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ]
|
||||
; CHECK-NEXT: tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
|
||||
|
||||
@@ -793,7 +793,6 @@ define void @multiple_ivs_wide(ptr %dst) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 -64, i32 -62, i32 -60, i32 -58>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 6
|
||||
@@ -802,7 +801,7 @@ define void @multiple_ivs_wide(ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
|
||||
@@ -829,7 +828,6 @@ define void @multiple_ivs_wide(ptr %dst) {
|
||||
; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6
|
||||
@@ -838,7 +836,7 @@ define void @multiple_ivs_wide(ptr %dst) {
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2
|
||||
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]]
|
||||
|
||||
@@ -15,22 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP4]]
|
||||
|
||||
@@ -25,11 +25,10 @@ define void @a(ptr readnone %b) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]]
|
||||
@@ -535,15 +534,13 @@ define i64 @ivopt_widen_ptr_indvar_2(ptr noalias %a, i64 %stride, i64 %n) {
|
||||
; STRIDED: vector.body:
|
||||
; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 0, [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]]
|
||||
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]]
|
||||
; STRIDED-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], [[TMP12]]
|
||||
; STRIDED-NEXT: [[TMP14:%.*]] = mul i64 3, [[TMP1]]
|
||||
; STRIDED-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], [[TMP14]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[OFFSET_IDX]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP13]]
|
||||
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP15]]
|
||||
|
||||
@@ -18,9 +18,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; IC1-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; IC1: [[VECTOR_BODY]]:
|
||||
; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE13:.*]] ]
|
||||
; IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; IC1-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
||||
; IC1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
|
||||
; IC1-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
|
||||
; IC1-NEXT: [[TMP12:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; IC1-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> [[TMP12]], ptr [[NEXT_GEP3]], i32 1
|
||||
@@ -113,11 +112,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
|
||||
; IC2-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; IC2: [[VECTOR_BODY]]:
|
||||
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ]
|
||||
; IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2
|
||||
; IC2-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3
|
||||
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]]
|
||||
; IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
|
||||
; IC2-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
|
||||
; IC2-NEXT: [[TMP23:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0
|
||||
; IC2-NEXT: [[TMP24:%.*]] = insertelement <2 x ptr> [[TMP23]], ptr [[NEXT_GEP3]], i32 1
|
||||
|
||||
@@ -2264,11 +2264,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP2]], i32 2
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1
|
||||
@@ -2284,7 +2287,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -2342,15 +2345,22 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
|
||||
; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP94:%.*]], %[[PRED_LOAD_CONTINUE15]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP98:%.*]], %[[PRED_LOAD_CONTINUE15]] ]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP1]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP2]], i32 2
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP3]], i32 3
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 4
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i32 0
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP5]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP6]], i32 2
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP7]], i32 3
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[INDEX]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1
|
||||
@@ -2379,7 +2389,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0
|
||||
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]:
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[INDEX]]
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
|
||||
; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0
|
||||
; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
|
||||
@@ -329,11 +329,10 @@ define void @reduc_store_inside_unrolled(ptr %dst, ptr readonly %src) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
|
||||
@@ -535,11 +534,10 @@ define void @reduc_store_middle_store_predicated(ptr %dst, ptr readonly %src) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
|
||||
|
||||
@@ -13,8 +13,9 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c)
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[OFFSET_IDX]], i32 0
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
|
||||
@@ -22,7 +23,7 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -42,7 +43,7 @@ define void @test_sdiv_variant_divisor_induction(ptr noalias %a, ptr noalias %c)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -93,16 +94,17 @@ define void @test_sdiv_variant_divisor_load(ptr noalias %a, ptr noalias %b, ptr
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i64> [[TMP30]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -127,7 +129,7 @@ define void @test_sdiv_variant_divisor_load(ptr noalias %a, ptr noalias %b, ptr
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP22]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -182,13 +184,14 @@ define void @test_sdiv_invariant_divisor_nonconst(ptr noalias %a, i64 %b, ptr no
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -208,7 +211,7 @@ define void @test_sdiv_invariant_divisor_nonconst(ptr noalias %a, i64 %b, ptr no
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -259,13 +262,14 @@ define void @test_sdiv_invariant_divisor_minusone(ptr noalias %a, ptr noalias %c
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i64> [[TMP22]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -285,7 +289,7 @@ define void @test_sdiv_invariant_divisor_minusone(ptr noalias %a, ptr noalias %c
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -336,13 +340,14 @@ define void @test_sdiv_invariant_divisor_safeimm(ptr noalias %a, ptr noalias %c)
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE4]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP1]], i32 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -361,7 +366,7 @@ define void @test_sdiv_invariant_divisor_safeimm(ptr noalias %a, ptr noalias %c)
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
@@ -475,13 +480,14 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2,
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i32 0
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i16> [[VEC_IND]], splat (i16 1024)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
@@ -502,7 +508,7 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2,
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; CHECK-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
|
||||
@@ -97,11 +97,10 @@ define void @blend_chain_iv(i1 %c) {
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP7]]
|
||||
@@ -155,17 +154,16 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) {
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 2
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 3
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> poison, ptr [[A]], i32 0
|
||||
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP6]], i32 1
|
||||
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x ptr> [[TMP36]], ptr [[TMP7]], i32 2
|
||||
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP37]], ptr [[TMP8]], i32 3
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_LOAD_IF]]:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0
|
||||
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
||||
; CHECK: [[PRED_LOAD_CONTINUE]]:
|
||||
@@ -196,7 +194,7 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) {
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
|
||||
; CHECK: [[PRED_STORE_IF]]:
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP26]], i32 0
|
||||
; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP5]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP28]], ptr [[A]], align 4
|
||||
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
|
||||
; CHECK: [[PRED_STORE_CONTINUE]]:
|
||||
; CHECK-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
|
||||
|
||||
@@ -149,7 +149,6 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -163,7 +162,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -204,15 +203,14 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
@@ -253,7 +251,6 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -267,7 +264,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -309,7 +306,6 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -323,7 +319,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -364,7 +360,6 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -378,7 +373,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -418,15 +413,14 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP0]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
@@ -606,7 +600,6 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -620,7 +613,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -661,15 +654,14 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
|
||||
@@ -710,7 +702,6 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -724,7 +715,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -766,7 +757,6 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -780,7 +770,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -822,7 +812,6 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -836,7 +825,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -877,15 +866,14 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP1]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
|
||||
@@ -924,7 +912,6 @@ define void @test_step_is_not_invariant(ptr %A) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16>
|
||||
@@ -934,7 +921,7 @@ define void @test_step_is_not_invariant(ptr %A) {
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP8]]
|
||||
; CHECK-NEXT: store i16 [[TMP1]], ptr [[TMP9]], align 2
|
||||
; CHECK-NEXT: store i16 [[TMP0]], ptr [[TMP9]], align 2
|
||||
; CHECK-NEXT: store i16 [[TMP2]], ptr [[TMP10]], align 2
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <2 x i32> [[VEC_IND]], splat (i32 2)
|
||||
|
||||
@@ -147,9 +147,8 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
|
||||
@@ -158,7 +157,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP7]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP12]], align 8
|
||||
@@ -198,13 +197,12 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[OFFSET_IDX]], 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 42
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
|
||||
@@ -244,9 +242,8 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 8
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
|
||||
@@ -255,7 +252,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP7]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i32 0
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP8]], i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP12]], align 8
|
||||
@@ -295,7 +292,6 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -309,7 +305,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -400,7 +396,6 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -414,7 +409,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -456,7 +451,6 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -470,7 +464,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -512,7 +506,6 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -526,7 +519,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
|
||||
@@ -283,7 +283,6 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
|
||||
@@ -291,7 +290,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[SRC_A]], i64 [[TMP3]]
|
||||
@@ -348,7 +347,7 @@ define void @ld_div2_ld_scevunknown_nonuniform(ptr %src.a, ptr noalias %src.b, p
|
||||
; CHECK-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP61]], i32 [[TMP54]], i32 5
|
||||
; CHECK-NEXT: [[TMP63:%.*]] = insertelement <8 x i32> [[TMP62]], i32 [[TMP55]], i32 6
|
||||
; CHECK-NEXT: [[TMP64:%.*]] = insertelement <8 x i32> [[TMP63]], i32 [[TMP56]], i32 7
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: store <8 x i32> [[TMP64]], ptr [[TMP65]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
|
||||
|
||||
@@ -229,7 +229,6 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -243,7 +242,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -266,7 +265,6 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -292,7 +290,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
|
||||
; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -337,15 +335,14 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2: vector.body:
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 1
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
||||
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
|
||||
; VF2-NEXT: [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
|
||||
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
|
||||
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP5]], ptr [[TMP7]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
@@ -366,11 +363,10 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4: vector.body:
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP0]], 1
|
||||
; VF4-NEXT: [[TMP4:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
|
||||
; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
|
||||
; VF4-NEXT: [[TMP6:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
@@ -378,7 +374,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP6]], i32 1
|
||||
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP6]], i32 2
|
||||
; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP6]], i32 3
|
||||
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -423,7 +419,6 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -437,7 +432,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -459,7 +454,6 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -485,7 +479,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
|
||||
; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -530,7 +524,6 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
|
||||
@@ -544,7 +537,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
|
||||
; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
@@ -566,7 +559,6 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -592,7 +584,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP21]], i32 1
|
||||
; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP21]], i32 2
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -725,15 +717,14 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP1]], 1
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
|
||||
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
|
||||
; VF2-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
|
||||
; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
|
||||
@@ -754,11 +745,10 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP1]], 1
|
||||
; VF4-NEXT: [[TMP5:%.*]] = lshr i64 [[OFFSET_IDX]], 1
|
||||
; VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
|
||||
; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
|
||||
; VF4-NEXT: [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
|
||||
@@ -766,7 +756,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
|
||||
; VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
|
||||
; VF4-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
|
||||
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -811,7 +801,6 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -825,7 +814,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -848,7 +837,6 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -874,7 +862,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -920,7 +908,6 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
|
||||
@@ -934,7 +921,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
@@ -957,7 +944,6 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 4, i64 7, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -983,7 +969,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP22]], i32 1
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP22]], i32 2
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
|
||||
@@ -304,7 +304,6 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
|
||||
@@ -320,7 +319,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -345,7 +344,6 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -373,7 +371,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -425,7 +423,6 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
|
||||
@@ -441,7 +438,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -466,7 +463,6 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -494,7 +490,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -546,7 +542,6 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
|
||||
@@ -562,7 +557,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -587,7 +582,6 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 2, i64 4, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -615,7 +609,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -667,7 +661,6 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
|
||||
@@ -683,7 +676,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -707,7 +700,6 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -735,7 +727,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -786,7 +778,6 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
|
||||
@@ -802,7 +793,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -826,7 +817,6 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -854,7 +844,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -905,7 +895,6 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
|
||||
@@ -921,7 +910,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
@@ -945,7 +934,6 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 3, i64 6, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -973,7 +961,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP23]], i32 1
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP23]], i32 2
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
|
||||
; VF4-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
@@ -1322,7 +1310,6 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
|
||||
@@ -1338,7 +1325,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1363,7 +1350,6 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -1391,7 +1377,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -1443,7 +1429,6 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
|
||||
@@ -1459,7 +1444,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1484,7 +1469,6 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -1512,7 +1496,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -1564,7 +1548,6 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
|
||||
@@ -1580,7 +1563,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1605,7 +1588,6 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 2
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
@@ -1633,7 +1615,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -1685,7 +1667,6 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer
|
||||
; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], zeroinitializer
|
||||
@@ -1701,7 +1682,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1726,7 +1707,6 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -1754,7 +1734,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -1806,7 +1786,6 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[VEC_IND1]], splat (i64 1)
|
||||
@@ -1822,7 +1801,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1847,7 +1826,6 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -1875,7 +1853,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
@@ -1927,7 +1905,6 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[VEC_IND1:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF2-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF2-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF2-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF2-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF2-NEXT: [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
|
||||
; VF2-NEXT: [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND1]], splat (i64 3)
|
||||
@@ -1943,7 +1920,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF2-NEXT: [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
|
||||
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
|
||||
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF2-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
|
||||
; VF2-NEXT: store i64 [[TMP16]], ptr [[TMP18]], align 8
|
||||
@@ -1968,7 +1945,6 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
|
||||
; VF4-NEXT: [[TMP0:%.*]] = mul i64 [[INDEX]], 3
|
||||
; VF4-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
|
||||
; VF4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
|
||||
; VF4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
|
||||
; VF4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
|
||||
; VF4-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
|
||||
@@ -1996,7 +1972,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
|
||||
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP24]], i32 1
|
||||
; VF4-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP24]], i32 2
|
||||
; VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
|
||||
; VF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
|
||||
; VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
|
||||
; VF4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
|
||||
; VF4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
||||
|
||||
@@ -107,11 +107,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
|
||||
; FORCED-TF-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; FORCED-TF: vector.body:
|
||||
; FORCED-TF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ]
|
||||
; FORCED-TF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; FORCED-TF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; FORCED-TF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; FORCED-TF-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
|
||||
; FORCED-TF-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[INDEX]]
|
||||
; FORCED-TF-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
|
||||
; FORCED-TF-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
|
||||
; FORCED-TF-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
|
||||
@@ -179,11 +178,10 @@ define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE11:%.*]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
|
||||
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP1]]
|
||||
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP2]]
|
||||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP3]]
|
||||
|
||||
@@ -112,9 +112,10 @@ for.end:
|
||||
; VF8: vector.body:
|
||||
; VF8-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
|
||||
; VF8-NEXT: [[OFFSET_IDX:%.+]] = mul i64 [[INDEX]], %step
|
||||
; VF8-NEXT: [[MUL0:%.+]] = mul i64 0, %step
|
||||
; VF8-NEXT: [[ADD:%.+]] = add i64 [[OFFSET_IDX]], [[MUL0]]
|
||||
; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD]]
|
||||
; VF8-NEXT: [[MUL1:%.+]] = mul i64 1, %step
|
||||
; VF8-NEXT: [[ADD1:%.+]] = add i64 [[OFFSET_IDX]], [[MUL1]]
|
||||
; VF8: getelementptr inbounds i32, ptr %in, i64 [[OFFSET_IDX]]
|
||||
; VF8: getelementptr inbounds i32, ptr %in, i64 [[ADD1]]
|
||||
; VF8: middle.block:
|
||||
|
||||
; VF1-LABEL: @doit2
|
||||
|
||||
@@ -141,9 +141,8 @@ define void @stride_poison(ptr %dst) mustprogress {
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], poison
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], poison
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], poison
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP4]]
|
||||
; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 1
|
||||
; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 1
|
||||
|
||||
@@ -157,11 +157,10 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
|
||||
@@ -235,11 +234,10 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1,
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
|
||||
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX2]], 2
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX2]], 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[OFFSET_IDX2]]
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP5]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST_1]], i32 [[TMP7]]
|
||||
|
||||
Reference in New Issue
Block a user