[LV] Remove selectUserVF, remove use of legacy expectedCost (NFCI) (#191216)

selectUserVectorizationFactor was still using legacy expectedCost.
Instead of going through the legacy cost model to check if the cost is
valid, directly go through the VPlan cost model.

This requires to first build the plans, then check their costs.

This removes another use of the legacy cost model.

PR: https://github.com/llvm/llvm-project/pull/191216
This commit is contained in:
Florian Hahn
2026-04-20 20:13:32 +01:00
committed by GitHub
parent d61f12f3e5
commit 1fbdf8afcb
16 changed files with 547 additions and 720 deletions

View File

@@ -885,13 +885,6 @@ public:
/// otherwise.
bool runtimeChecksRequired();
/// Setup cost-based decisions for user vectorization factor.
/// \return true if the UserVF is a feasible VF to be chosen.
bool selectUserVectorizationFactor(ElementCount UserVF) {
collectNonVectorizedAndSetWideningDecisions(UserVF);
return expectedCost(UserVF).isValid();
}
/// \return True if maximizing vector bandwidth is enabled by the target or
/// user options, for the given register kind.
bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind);
@@ -4983,15 +4976,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
InstructionCost Cost;
// If the vector loop gets executed exactly once with the given VF, ignore the
// costs of comparison and induction instructions, as they'll get simplified
// away.
SmallPtrSet<Instruction *, 2> ValuesToIgnoreForVF;
auto TC = getSmallConstantTripCount(PSE.getSE(), TheLoop);
if (TC == VF && !foldTailByMasking())
addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(),
ValuesToIgnoreForVF);
assert(VF.isScalar() && "must only be called for scalar VFs");
// For each block.
for (BasicBlock *BB : TheLoop->blocks()) {
@@ -5000,43 +4985,26 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
// For each instruction in the old loop.
for (Instruction &I : *BB) {
// Skip ignored values.
if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) ||
if (ValuesToIgnore.count(&I) ||
(VF.isVector() && VecValuesToIgnore.count(&I)))
continue;
InstructionCost C = getInstructionCost(&I, VF);
// Check if we should override the cost.
if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) {
// For interleave groups, use ForceTargetInstructionCost once for the
// whole group.
if (VF.isVector() && getWideningDecision(&I, VF) == CM_Interleave) {
if (getInterleavedAccessGroup(&I)->getInsertPos() == &I)
C = InstructionCost(ForceTargetInstructionCost);
else
C = InstructionCost(0);
} else {
C = InstructionCost(ForceTargetInstructionCost);
}
}
if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0)
C = InstructionCost(ForceTargetInstructionCost);
BlockCost += C;
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF "
<< VF << " For instruction: " << I << '\n');
}
// If we are vectorizing a predicated block, it will have been
// if-converted. This means that the block's instructions (aside from
// stores and instructions that may divide by zero) will now be
// unconditionally executed. For the scalar case, we may not always execute
// the predicated block, if it is an if-else block. Thus, scale the block's
// cost by the probability of executing it.
// getPredBlockCostDivisor will return 1 for blocks that are only predicated
// by the header mask when folding the tail.
if (VF.isScalar())
BlockCost /= getPredBlockCostDivisor(CostKind, BB);
Cost += BlockCost;
// In the scalar loop, we may not always execute the predicated block, if it
// is an if-else block. Thus, scale the block's cost by the probability of
// executing it. getPredBlockCostDivisor will return 1 for blocks that are
// only predicated by the header mask when folding the tail.
Cost += BlockCost / getPredBlockCostDivisor(CostKind, BB);
}
return Cost;
@@ -6636,20 +6604,26 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
// Collect the instructions (and their associated costs) that will be more
// profitable to scalarize.
CM.collectInLoopReductions();
if (CM.selectUserVectorizationFactor(UserVF)) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
ElementCount EpilogueUserVF =
ElementCount::getFixed(EpilogueVectorizationForceVF);
if (EpilogueUserVF.isVector() &&
ElementCount::isKnownLT(EpilogueUserVF, UserVF) &&
CM.selectUserVectorizationFactor(EpilogueUserVF)) {
// Build a separate plan for the forced epilogue VF.
buildVPlansWithVPRecipes(EpilogueUserVF, EpilogueUserVF);
}
buildVPlansWithVPRecipes(UserVF, UserVF);
LLVM_DEBUG(printPlans(dbgs()));
return;
CM.collectNonVectorizedAndSetWideningDecisions(UserVF);
ElementCount EpilogueUserVF =
ElementCount::getFixed(EpilogueVectorizationForceVF);
if (EpilogueUserVF.isVector() &&
ElementCount::isKnownLT(EpilogueUserVF, UserVF)) {
CM.collectNonVectorizedAndSetWideningDecisions(EpilogueUserVF);
buildVPlansWithVPRecipes(EpilogueUserVF, EpilogueUserVF);
}
buildVPlansWithVPRecipes(UserVF, UserVF);
if (!VPlans.empty() && VPlans.back()->getSingleVF() == UserVF) {
// For scalar VF, skip VPlan cost check as VPlan cost is designed for
// vector VFs only.
if (UserVF.isScalar() ||
cost(*VPlans.back(), UserVF, /*RU=*/nullptr).isValid()) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
LLVM_DEBUG(printPlans(dbgs()));
return;
}
}
VPlans.clear();
reportVectorizationInfo("UserVF ignored because of invalid costs.",
"InvalidCost", ORE, OrigLoop);
}
@@ -6864,7 +6838,7 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF,
Cost += Plan.cost(VF, CostCtx);
// Add the cost of spills due to excess register usage
if (CM.shouldConsiderRegPressureForVF(VF))
if (RU && CM.shouldConsiderRegPressureForVF(VF))
Cost += RU->spillCost(CostCtx, ForceTargetNumVectorRegs);
#ifndef NDEBUG

View File

@@ -7,8 +7,8 @@ target triple = "aarch64--linux-gnu"
%pair = type { i8, i8 }
; CHECK-LABEL: test
; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
; CHECK: Cost of 8 for VF 2: REPLICATE ir<%tmp1> = load ir<%tmp0>
; CHECK: Cost of 8 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp2>
; CHECK-LABEL: entry:
; CHECK-LABEL: vector.body:
; CHECK: [[LOAD1:%.*]] = load i8

View File

@@ -13,15 +13,11 @@ entry:
br label %for.body
; VF_8-LABEL: Checking a loop in 'i8_factor_2'
; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i8_factor_2'
; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
@@ -44,20 +40,14 @@ entry:
br label %for.body
; VF_4-LABEL: Checking a loop in 'i16_factor_2'
; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'i16_factor_2'
; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i16_factor_2'
; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
@@ -80,25 +70,17 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i32_factor_2'
; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_4-LABEL: Checking a loop in 'i32_factor_2'
; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'i32_factor_2'
; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i32_factor_2'
; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
@@ -121,25 +103,17 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i64_factor_2'
; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_2: Cost of 1 for VF 2: WIDEN ir<%tmp2> = load ir<%tmp0>
; VF_2-NEXT: Cost of 1 for VF 2: WIDEN store ir<%tmp0>, ir<%tmp2>
; VF_4-LABEL: Checking a loop in 'i64_factor_2'
; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'i64_factor_2'
; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i64_factor_2'
; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
@@ -168,10 +142,10 @@ entry:
; gaps.
;
; VF_2-LABEL: Checking a loop in 'i64_factor_8'
; VF_2: Found an estimated cost of 8 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_2: Cost of 8 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0>
; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1>
; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0>
; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2

View File

@@ -46,7 +46,7 @@ for.inc:
; CHECK-COST: Checking a loop in 'scalable'
; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction: store i32 2, ptr %arrayidx1, align 4
; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN store vp<{{.+}}>, ir<2>, ir<{{.+}}>
define void @scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @scalable(

View File

@@ -6,8 +6,8 @@
target triple="aarch64-unknown-linux-gnu"
; CHECK-VF4: Found an estimated cost of 14 for VF 4 For instruction: %add = fadd float %0, %sum.07
; CHECK-VF8: Found an estimated cost of 28 for VF 8 For instruction: %add = fadd float %0, %sum.07
; CHECK-VF4: Cost of 14 for VF 4: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>)
; CHECK-VF8: Cost of 28 for VF 8: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>)
define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) {
entry:
@@ -28,8 +28,8 @@ for.end:
}
; CHECK-VF4: Found an estimated cost of 12 for VF 4 For instruction: %add = fadd double %0, %sum.07
; CHECK-VF8: Found an estimated cost of 24 for VF 8 For instruction: %add = fadd double %0, %sum.07
; CHECK-VF4: Cost of 12 for VF 4: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>)
; CHECK-VF8: Cost of 24 for VF 8: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>)
define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) {
entry:
@@ -49,8 +49,8 @@ for.end:
ret double %add
}
; CHECK-VF4: Found an estimated cost of 16 for VF 4 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
; CHECK-VF8: Found an estimated cost of 32 for VF 8 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
; CHECK-VF4: Cost of 14 for VF 4: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>)
; CHECK-VF8: Cost of 28 for VF 8: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>)
define float @fmuladd_strict32(ptr %a, ptr %b, i64 %n) {
entry:
@@ -73,8 +73,8 @@ for.end:
}
; CHECK-VF4: Found an estimated cost of 16 for VF 4 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
; CHECK-VF8: Found an estimated cost of 32 for VF 8 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07)
; CHECK-VF4: Cost of 12 for VF 4: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>)
; CHECK-VF8: Cost of 24 for VF 8: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>)
define double @fmuladd_strict64(ptr %a, ptr %b, i64 %n) {
entry:

View File

@@ -5,7 +5,7 @@
target triple="aarch64--linux-gnu"
; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx3, align 4
; CHECK: Cost of 81 for VF vscale x 4: WIDEN ir<%1> = load ir<%arrayidx3>
define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
entry:
br label %for.body
@@ -27,7 +27,7 @@ for.cond.cleanup:
}
; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index'
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, ptr %arrayidx5, align 4
; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx5>, ir<%1>
define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 {
entry:
br label %for.body
@@ -51,7 +51,7 @@ for.cond.cleanup:
; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1.
; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride'
; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
; CHECK: Cost of 1 for VF vscale x 4: WIDEN ir<%0> = load vp<{{.+}}>
define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
entry:
br label %for.body
@@ -74,7 +74,7 @@ for.cond.cleanup:
; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks
; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1.
; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride'
; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
; CHECK: Cost of 1 for VF vscale x 4: WIDEN store vp<{{.+}}>, ir<%0>
define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 {
entry:
br label %for.body
@@ -95,7 +95,7 @@ for.cond.cleanup:
}
; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx>
define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
entry:
br label %for.body
@@ -116,7 +116,7 @@ for.cond.cleanup:
}
; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2'
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx2>, ir<%0>
define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
entry:
br label %for.body
@@ -138,7 +138,7 @@ for.cond.cleanup:
; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64'
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
; CHECK: Cost of 81 for VF vscale x 4: WIDEN ir<%0> = load ir<%arrayidx>
define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
entry:
br label %for.body
@@ -159,7 +159,7 @@ for.cond.cleanup:
}
; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64'
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4
; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx2>, ir<%0>
define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
entry:
br label %for.body

View File

@@ -5,8 +5,8 @@ target triple = "aarch64-linux-gnu"
define void @loop_sve_i1(ptr nocapture %ptr, i64 %N) {
; CHECK-LABEL: LV: Checking a loop in 'loop_sve_i1'
; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %0 = load i1, ptr %arrayidx, align 16
; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i1 %add, ptr %arrayidx, align 16
; CHECK: Cost of Invalid for VF vscale x 4: REPLICATE ir<%0> = load ir<%arrayidx>
; CHECK: Cost of Invalid for VF vscale x 4: REPLICATE store ir<%add>, ir<%arrayidx>
entry:
br label %for.body

View File

@@ -13,15 +13,11 @@ entry:
br label %for.body
; VF_8-LABEL: Checking a loop in 'i8_factor_2'
; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i8_factor_2'
; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
@@ -44,20 +40,14 @@ entry:
br label %for.body
; VF_4-LABEL: Checking a loop in 'i16_factor_2'
; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'i16_factor_2'
; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i16_factor_2'
; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
@@ -80,25 +70,17 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i32_factor_2'
; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_4-LABEL: Checking a loop in 'i32_factor_2'
; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'i32_factor_2'
; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_16-LABEL: Checking a loop in 'i32_factor_2'
; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
@@ -121,15 +103,11 @@ entry:
br label %for.body
; VF_4-LABEL: Checking a loop in 'half_factor_2'
; VF_4: Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2
; VF_4: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_4: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; VF_8-LABEL: Checking a loop in 'half_factor_2'
; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2
; VF_8-NEXT: Found an estimated cost of 80 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2
; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0>
; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 0

View File

@@ -32,7 +32,8 @@ for.inc:
for.end.loopexit:
ret void
; CHECK: LV: Found an estimated cost of 7 for VF 2 For instruction: br i1 %cmp55, label %if.then, label %for.inc
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: br i1 %exitcond, label %for.end.loopexit, label %for.body
; CHECK: Cost of 1 for VF 2: profitable to scalarize store i32 %sub, ptr %arrayidx, align 4
; CHECK: Cost of 2 for VF 2: profitable to scalarize %sub = sub nsw i32 0, %l
; CHECK: Cost of 1 for VF 2: WIDEN ir<%cmp55> = icmp sgt ir<%l>, ir<0>
; CHECK: Cost of 1 for VF 2: vector loop backedge
}

View File

@@ -24,6 +24,6 @@ for.body:
for.end:
ret void
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %mul = mul nsw i64 %iv, %s
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %ld = load i64, ptr %bct
; CHECK: Cost of 2 for VF 2: forced scalar %mul = mul nsw i64 %iv, %s
; CHECK: Cost of 2 for VF 2: REPLICATE ir<%ld> = load ir<%bct>
}

View File

@@ -24,5 +24,5 @@ for.body:
for.end:
ret i32 %acc_next
; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %ld = load i32, ptr %gep
; CHECK: Cost of 4 for VF 4: REPLICATE ir<%ld> = load ir<%gep>
}

View File

@@ -6,8 +6,8 @@
; Check cost function for <8 x i128> store interleave group.
; CHECK: LV: Checking a loop in 'fun'
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113
; CHECK: Cost of 4 for VF 4: REPLICATE store ir<8721036757475490113>, ir<%arrayidx10.i>
; CHECK: Cost of 4 for VF 4: REPLICATE store ir<8721036757475490113>, ir<%arrayidx10.i.c>
define noundef i32 @fun(i32 %argc, ptr nocapture readnone %argv) {
entry:

View File

@@ -27,7 +27,7 @@ for.end:
ret void
; CHECK: LV: Creating an interleave group with: %tmp1 = load i32, ptr %tmp0, align 4
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %tmp1 = load i32, ptr %tmp0, align 4
; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp1, ir<%tmp0>
; (vl; vl; vperm)
}
@@ -59,12 +59,10 @@ for.end:
; CHECK: LV: Inserted: %tmp1 = load i32, ptr %tmp0, align 4
; CHECK: into the interleave group with %tmp3 = load i32, ptr %tmp2, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, ptr %tmp0, align 4
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp2, align 4
; CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp1, ir<%tmp0>
; (vl; vl; vperm, vpkg)
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp1, ptr %tmp2, align 4
; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4
; CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%tmp0>
; (vmrlf; vmrhf; vst; vst)
}

View File

@@ -5,8 +5,8 @@
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: phi_two_incoming_values
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ]
; CHECK: Cost of 1 for VF 2: induction instruction %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
; CHECK: Cost of 1 for VF 2: BLEND ir<%tmp5> = ir<%tmp1> ir<%tmp4>/ir<%tmp3>
;
define void @phi_two_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) {
; CHECK-LABEL: define void @phi_two_incoming_values(
@@ -82,8 +82,8 @@ for.end:
}
; CHECK-LABEL: phi_three_incoming_values
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ]
; CHECK: Cost of 1 for VF 2: induction instruction %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ]
; CHECK: Cost of 2 for VF 2: BLEND ir<%tmp8> = ir<%tmp7> ir<3>/vp<{{.*}}> ir<9>/vp<{{.*}}>
;
define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) {
; CHECK-LABEL: define void @phi_three_incoming_values(

View File

@@ -3,7 +3,7 @@
%struct.foo = type { i32, i64 }
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %0 = bitcast ptr %b to ptr
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%0> = bitcast ir<%b> to ptr
; The bitcast below will be scalarized due to the predication in the loop. Bitcasts
; between pointer types should be treated as free, despite the scalarization.