diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3a1f820cc16d..98803faa563d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -885,13 +885,6 @@ public: /// otherwise. bool runtimeChecksRequired(); - /// Setup cost-based decisions for user vectorization factor. - /// \return true if the UserVF is a feasible VF to be chosen. - bool selectUserVectorizationFactor(ElementCount UserVF) { - collectNonVectorizedAndSetWideningDecisions(UserVF); - return expectedCost(UserVF).isValid(); - } - /// \return True if maximizing vector bandwidth is enabled by the target or /// user options, for the given register kind. bool useMaxBandwidth(TargetTransformInfo::RegisterKind RegKind); @@ -4983,15 +4976,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { InstructionCost Cost; - - // If the vector loop gets executed exactly once with the given VF, ignore the - // costs of comparison and induction instructions, as they'll get simplified - // away. - SmallPtrSet ValuesToIgnoreForVF; - auto TC = getSmallConstantTripCount(PSE.getSE(), TheLoop); - if (TC == VF && !foldTailByMasking()) - addFullyUnrolledInstructionsToIgnore(TheLoop, Legal->getInductionVars(), - ValuesToIgnoreForVF); + assert(VF.isScalar() && "must only be called for scalar VFs"); // For each block. for (BasicBlock *BB : TheLoop->blocks()) { @@ -5000,43 +4985,26 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { // For each instruction in the old loop. for (Instruction &I : *BB) { // Skip ignored values. - if (ValuesToIgnore.count(&I) || ValuesToIgnoreForVF.count(&I) || + if (ValuesToIgnore.count(&I) || (VF.isVector() && VecValuesToIgnore.count(&I))) continue; InstructionCost C = getInstructionCost(&I, VF); // Check if we should override the cost. - if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) { - // For interleave groups, use ForceTargetInstructionCost once for the - // whole group. - if (VF.isVector() && getWideningDecision(&I, VF) == CM_Interleave) { - if (getInterleavedAccessGroup(&I)->getInsertPos() == &I) - C = InstructionCost(ForceTargetInstructionCost); - else - C = InstructionCost(0); - } else { - C = InstructionCost(ForceTargetInstructionCost); - } - } + if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) + C = InstructionCost(ForceTargetInstructionCost); BlockCost += C; LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " << VF << " For instruction: " << I << '\n'); } - // If we are vectorizing a predicated block, it will have been - // if-converted. This means that the block's instructions (aside from - // stores and instructions that may divide by zero) will now be - // unconditionally executed. For the scalar case, we may not always execute - // the predicated block, if it is an if-else block. Thus, scale the block's - // cost by the probability of executing it. - // getPredBlockCostDivisor will return 1 for blocks that are only predicated - // by the header mask when folding the tail. - if (VF.isScalar()) - BlockCost /= getPredBlockCostDivisor(CostKind, BB); - - Cost += BlockCost; + // In the scalar loop, we may not always execute the predicated block, if it + // is an if-else block. Thus, scale the block's cost by the probability of + // executing it. getPredBlockCostDivisor will return 1 for blocks that are + // only predicated by the header mask when folding the tail. + Cost += BlockCost / getPredBlockCostDivisor(CostKind, BB); } return Cost; @@ -6636,20 +6604,26 @@ void LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.collectInLoopReductions(); - if (CM.selectUserVectorizationFactor(UserVF)) { - LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); - ElementCount EpilogueUserVF = - ElementCount::getFixed(EpilogueVectorizationForceVF); - if (EpilogueUserVF.isVector() && - ElementCount::isKnownLT(EpilogueUserVF, UserVF) && - CM.selectUserVectorizationFactor(EpilogueUserVF)) { - // Build a separate plan for the forced epilogue VF. - buildVPlansWithVPRecipes(EpilogueUserVF, EpilogueUserVF); - } - buildVPlansWithVPRecipes(UserVF, UserVF); - LLVM_DEBUG(printPlans(dbgs())); - return; + CM.collectNonVectorizedAndSetWideningDecisions(UserVF); + ElementCount EpilogueUserVF = + ElementCount::getFixed(EpilogueVectorizationForceVF); + if (EpilogueUserVF.isVector() && + ElementCount::isKnownLT(EpilogueUserVF, UserVF)) { + CM.collectNonVectorizedAndSetWideningDecisions(EpilogueUserVF); + buildVPlansWithVPRecipes(EpilogueUserVF, EpilogueUserVF); } + buildVPlansWithVPRecipes(UserVF, UserVF); + if (!VPlans.empty() && VPlans.back()->getSingleVF() == UserVF) { + // For scalar VF, skip VPlan cost check as VPlan cost is designed for + // vector VFs only. + if (UserVF.isScalar() || + cost(*VPlans.back(), UserVF, /*RU=*/nullptr).isValid()) { + LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); + LLVM_DEBUG(printPlans(dbgs())); + return; + } + } + VPlans.clear(); reportVectorizationInfo("UserVF ignored because of invalid costs.", "InvalidCost", ORE, OrigLoop); } @@ -6864,7 +6838,7 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF, Cost += Plan.cost(VF, CostCtx); // Add the cost of spills due to excess register usage - if (CM.shouldConsiderRegPressureForVF(VF)) + if (RU && CM.shouldConsiderRegPressureForVF(VF)) Cost += RU->spillCost(CostCtx, ForceTargetNumVectorRegs); #ifndef NDEBUG diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll index d1d1b0ab100f..5ae0a536a228 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll @@ -7,8 +7,8 @@ target triple = "aarch64--linux-gnu" %pair = type { i8, i8 } ; CHECK-LABEL: test -; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8 -; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8 +; CHECK: Cost of 8 for VF 2: REPLICATE ir<%tmp1> = load ir<%tmp0> +; CHECK: Cost of 8 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp2> ; CHECK-LABEL: entry: ; CHECK-LABEL: vector.body: ; CHECK: [[LOAD1:%.*]] = load i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll index a550f1ca14c8..44cc7bbc9294 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -13,15 +13,11 @@ entry: br label %for.body ; VF_8-LABEL: Checking a loop in 'i8_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -44,20 +40,14 @@ entry: br label %for.body ; VF_4-LABEL: Checking a loop in 'i16_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i16_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i16_factor_2' -; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0 @@ -80,25 +70,17 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'i32_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i32_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i32_factor_2' -; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0 @@ -121,25 +103,17 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2: Cost of 1 for VF 2: WIDEN ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 1 for VF 2: WIDEN store ir<%tmp0>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'i64_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i64_factor_2' -; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i64_factor_2' -; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0 @@ -168,10 +142,10 @@ entry: ; gaps. ; ; VF_2-LABEL: Checking a loop in 'i64_factor_8' -; VF_2: Found an estimated cost of 8 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2: Cost of 8 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 8 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll index 79b4c1767a56..e007b706d3d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -46,7 +46,7 @@ for.inc: ; CHECK-COST: Checking a loop in 'scalable' -; CHECK-COST: Found an estimated cost of 1 for VF vscale x 4 For instruction: store i32 2, ptr %arrayidx1, align 4 +; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN store vp<{{.+}}>, ir<2>, ir<{{.+}}> define void @scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @scalable( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll index ac292e408b91..c2d5ec2c1933 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll @@ -6,8 +6,8 @@ target triple="aarch64-unknown-linux-gnu" -; CHECK-VF4: Found an estimated cost of 14 for VF 4 For instruction: %add = fadd float %0, %sum.07 -; CHECK-VF8: Found an estimated cost of 28 for VF 8 For instruction: %add = fadd float %0, %sum.07 +; CHECK-VF4: Cost of 14 for VF 4: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>) +; CHECK-VF8: Cost of 28 for VF 8: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>) define float @fadd_strict32(ptr noalias nocapture readonly %a, i64 %n) { entry: @@ -28,8 +28,8 @@ for.end: } -; CHECK-VF4: Found an estimated cost of 12 for VF 4 For instruction: %add = fadd double %0, %sum.07 -; CHECK-VF8: Found an estimated cost of 24 for VF 8 For instruction: %add = fadd double %0, %sum.07 +; CHECK-VF4: Cost of 12 for VF 4: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>) +; CHECK-VF8: Cost of 24 for VF 8: REDUCE ir<%add> = ir<%sum.07> + reduce.fadd (ir<%0>) define double @fadd_strict64(ptr noalias nocapture readonly %a, i64 %n) { entry: @@ -49,8 +49,8 @@ for.end: ret double %add } -; CHECK-VF4: Found an estimated cost of 16 for VF 4 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) -; CHECK-VF8: Found an estimated cost of 32 for VF 8 For instruction: %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) +; CHECK-VF4: Cost of 14 for VF 4: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>) +; CHECK-VF8: Cost of 28 for VF 8: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>) define float @fmuladd_strict32(ptr %a, ptr %b, i64 %n) { entry: @@ -73,8 +73,8 @@ for.end: } -; CHECK-VF4: Found an estimated cost of 16 for VF 4 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07) -; CHECK-VF8: Found an estimated cost of 32 for VF 8 For instruction: %muladd = tail call double @llvm.fmuladd.f64(double %0, double %1, double %sum.07) +; CHECK-VF4: Cost of 12 for VF 4: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>) +; CHECK-VF8: Cost of 24 for VF 8: REDUCE ir<%muladd> = ir<%sum.07> + reduce.fadd (vp<{{.+}}>) define double @fmuladd_strict64(ptr %a, ptr %b, i64 %n) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll index fc4be701b008..0531261c1503 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll @@ -5,7 +5,7 @@ target triple="aarch64--linux-gnu" ; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx3, align 4 +; CHECK: Cost of 81 for VF vscale x 4: WIDEN ir<%1> = load ir<%arrayidx3> define void @gather_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 { entry: br label %for.body @@ -27,7 +27,7 @@ for.cond.cleanup: } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, ptr %arrayidx5, align 4 +; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx5>, ir<%1> define void @scatter_nxv4i32_loaded_index(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture %c, i64 %n) #0 { entry: br label %for.body @@ -51,7 +51,7 @@ for.cond.cleanup: ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1. ; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride' -; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +; CHECK: Cost of 1 for VF vscale x 4: WIDEN ir<%0> = load vp<{{.+}}> define void @gather_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: br label %for.body @@ -74,7 +74,7 @@ for.cond.cleanup: ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1. ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride' -; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +; CHECK: Cost of 1 for VF vscale x 4: WIDEN store vp<{{.+}}>, ir<%0> define void @scatter_nxv4i32_unknown_stride(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: br label %for.body @@ -95,7 +95,7 @@ for.cond.cleanup: } ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2' -; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %0, ir<%arrayidx> define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -116,7 +116,7 @@ for.cond.cleanup: } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx2>, ir<%0> define void @scatter_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -138,7 +138,7 @@ for.cond.cleanup: ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +; CHECK: Cost of 81 for VF vscale x 4: WIDEN ir<%0> = load ir<%arrayidx> define void @gather_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body @@ -159,7 +159,7 @@ for.cond.cleanup: } ; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, ptr %arrayidx2, align 4 +; CHECK: Cost of 81 for VF vscale x 4: WIDEN store ir<%arrayidx2>, ir<%0> define void @scatter_nxv4i32_stride64(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-invalid-costs.ll index 10f486676f1f..6013e2b68ff7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-invalid-costs.ll @@ -5,8 +5,8 @@ target triple = "aarch64-linux-gnu" define void @loop_sve_i1(ptr nocapture %ptr, i64 %N) { ; CHECK-LABEL: LV: Checking a loop in 'loop_sve_i1' -; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %0 = load i1, ptr %arrayidx, align 16 -; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i1 %add, ptr %arrayidx, align 16 +; CHECK: Cost of Invalid for VF vscale x 4: REPLICATE ir<%0> = load ir<%arrayidx> +; CHECK: Cost of Invalid for VF vscale x 4: REPLICATE store ir<%add>, ir<%arrayidx> entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll index 214d9abd712c..c3cfd48208e8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll @@ -13,15 +13,11 @@ entry: br label %for.body ; VF_8-LABEL: Checking a loop in 'i8_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -44,20 +40,14 @@ entry: br label %for.body ; VF_4-LABEL: Checking a loop in 'i16_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i16_factor_2' -; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i16_factor_2' -; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0 @@ -80,25 +70,17 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_2' -; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'i32_factor_2' -; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i32_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_16-LABEL: Checking a loop in 'i32_factor_2' -; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0 @@ -121,15 +103,11 @@ entry: br label %for.body ; VF_4-LABEL: Checking a loop in 'half_factor_2' -; VF_4: Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_4: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'half_factor_2' -; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 80 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll index 551b85b7d035..76898f5010e8 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll @@ -15,25 +15,19 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_2' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'i8_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i8_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1 -; VF_16-LABEL: Checking a loop in 'i8_factor_2' -; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 4 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1 +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'i8_factor_2' +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -56,25 +50,19 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_2' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'i16_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i16_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2 -; VF_16-LABEL: Checking a loop in 'i16_factor_2' -; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2 +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'i16_factor_2' +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0 @@ -97,25 +85,19 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_2' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'i32_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'i32_factor_2' -; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4 -; VF_16-LABEL: Checking a loop in 'i32_factor_2' -; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4 +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'i32_factor_2' +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0 @@ -138,25 +120,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_2' -; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_2: Cost of 22 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'i64_factor_2' -; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_4: Cost of 44 for VF 4: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_8-LABEL: Checking a loop in 'i64_factor_2' -; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8 -; VF_16-LABEL: Checking a loop in 'i64_factor_2' -; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8 +; VF_8: Cost of 88 for VF 8: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp1> +; VF_16-LABEL: Checking a loop in 'i64_factor_2' +; VF_16: Cost of 176 for VF 16: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp1> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0 @@ -179,25 +161,19 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f16_factor_2' -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_2: Cost of 6 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'f16_factor_2' -; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 18 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_4: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'f16_factor_2' -; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2 -; VF_16-LABEL: Checking a loop in 'f16_factor_2' -; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp2, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 8 for VF 16 For instruction: store half %tmp3, ptr %tmp1, align 2 +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f16_factor_2' +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 0 @@ -220,25 +196,17 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f32_factor_2' -; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp2, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 10 for VF 2 For instruction: store float %tmp3, ptr %tmp1, align 4 +; VF_2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'f32_factor_2' -; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp2, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store float %tmp3, ptr %tmp1, align 4 +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_4: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'f32_factor_2' -; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp2, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store float %tmp3, ptr %tmp1, align 4 -; VF_16-LABEL: Checking a loop in 'f32_factor_2' -; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp2, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 16 for VF 16 For instruction: store float %tmp3, ptr %tmp1, align 4 +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_8: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f32_factor_2' +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at %tmp2, ir<%tmp0> +; VF_16: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 0 @@ -261,25 +229,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_2' -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_2: Cost of 6 for VF 2: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_4-LABEL: Checking a loop in 'f64_factor_2' -; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_4: Cost of 12 for VF 4: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp1> ; VF_8-LABEL: Checking a loop in 'f64_factor_2' -; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8 -; VF_16-LABEL: Checking a loop in 'f64_factor_2' -; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8 +; VF_8: Cost of 24 for VF 8: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp1> +; VF_16-LABEL: Checking a loop in 'f64_factor_2' +; VF_16: Cost of 48 for VF 16: REPLICATE ir<%tmp2> = load ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp2>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp1> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0 @@ -306,33 +274,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_3' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'i8_factor_3' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_4: Cost of 24 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_8-LABEL: Checking a loop in 'i8_factor_3' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1 -; VF_16-LABEL: Checking a loop in 'i8_factor_3' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp2> +; VF_16-LABEL: Checking a loop in 'i8_factor_3' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp2> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -358,33 +326,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_3' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'i16_factor_3' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_4: Cost of 24 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_8-LABEL: Checking a loop in 'i16_factor_3' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2 -; VF_16-LABEL: Checking a loop in 'i16_factor_3' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp2> +; VF_16-LABEL: Checking a loop in 'i16_factor_3' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp2> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0 @@ -410,33 +378,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_3' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'i32_factor_3' -; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_4: Cost of 8 for VF 4: WIDEN ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp0>, ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp1>, ir<%tmp4> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp2>, ir<%tmp5> ; VF_8-LABEL: Checking a loop in 'i32_factor_3' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4 -; VF_16-LABEL: Checking a loop in 'i32_factor_3' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp2> +; VF_16-LABEL: Checking a loop in 'i32_factor_3' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp2> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0 @@ -462,33 +430,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_3' -; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_2: Cost of 22 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'i64_factor_3' -; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_4: Cost of 44 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_8-LABEL: Checking a loop in 'i64_factor_3' -; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8 -; VF_16-LABEL: Checking a loop in 'i64_factor_3' -; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8 +; VF_8: Cost of 88 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp2> +; VF_16-LABEL: Checking a loop in 'i64_factor_3' +; VF_16: Cost of 176 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp2> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0 @@ -514,33 +482,21 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f16_factor_3' -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2 +; VF_2: Cost of 6 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'f16_factor_3' -; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 28 for VF 4 For instruction: store half %tmp5, ptr %tmp2, align 2 +; VF_4: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_4: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'f16_factor_3' -; VF_8: Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 56 for VF 8 For instruction: store half %tmp5, ptr %tmp2, align 2 -; VF_16-LABEL: Checking a loop in 'f16_factor_3' -; VF_16: Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp3, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 112 for VF 16 For instruction: store half %tmp5, ptr %tmp2, align 2 +; VF_8: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_8: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f16_factor_3' +; VF_16: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_16: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 0 @@ -566,33 +522,21 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f32_factor_3' -; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp3, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float %tmp5, ptr %tmp2, align 4 +; VF_2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'f32_factor_3' -; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4 +; VF_4: Cost of 8 for VF 4: WIDEN ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp0>, ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp1>, ir<%tmp4> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp2>, ir<%tmp5> ; VF_8-LABEL: Checking a loop in 'f32_factor_3' -; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp3, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store float %tmp5, ptr %tmp2, align 4 -; VF_16-LABEL: Checking a loop in 'f32_factor_3' -; VF_16: Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp3, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store float %tmp5, ptr %tmp2, align 4 +; VF_8: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_8: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f32_factor_3' +; VF_16: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 3 at %tmp3, ir<%tmp0> +; VF_16: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 0 @@ -618,33 +562,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_3' -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_2: Cost of 6 for VF 2: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_4-LABEL: Checking a loop in 'f64_factor_3' -; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_4: Cost of 12 for VF 4: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp2> ; VF_8-LABEL: Checking a loop in 'f64_factor_3' -; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8 -; VF_16-LABEL: Checking a loop in 'f64_factor_3' -; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8 +; VF_8: Cost of 24 for VF 8: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp2> +; VF_16-LABEL: Checking a loop in 'f64_factor_3' +; VF_16: Cost of 48 for VF 16: REPLICATE ir<%tmp3> = load ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp1> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp3>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp2> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0 @@ -673,41 +617,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i8_factor_4' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1 -; VF_4-LABEL: Checking a loop in 'i8_factor_4' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_4-LABEL: Checking a loop in 'i8_factor_4' +; VF_4: Cost of 24 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_8-LABEL: Checking a loop in 'i8_factor_4' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1 -; VF_16-LABEL: Checking a loop in 'i8_factor_4' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_16-LABEL: Checking a loop in 'i8_factor_4' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp7>, ir<%tmp3> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 @@ -736,41 +680,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i16_factor_4' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_4-LABEL: Checking a loop in 'i16_factor_4' -; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_4: Cost of 24 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 24 for VF 4: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_8-LABEL: Checking a loop in 'i16_factor_4' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2 -; VF_16-LABEL: Checking a loop in 'i16_factor_4' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_16-LABEL: Checking a loop in 'i16_factor_4' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp7>, ir<%tmp3> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0 @@ -799,41 +743,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i32_factor_4' -; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_2: Cost of 12 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_2-NEXT: Cost of 12 for VF 2: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_4-LABEL: Checking a loop in 'i32_factor_4' -; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_4: Cost of 8 for VF 4: WIDEN ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp0>, ir<%tmp4> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp1>, ir<%tmp5> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp2>, ir<%tmp6> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp3>, ir<%tmp7> ; VF_8-LABEL: Checking a loop in 'i32_factor_4' -; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4 -; VF_16-LABEL: Checking a loop in 'i32_factor_4' -; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4 +; VF_8: Cost of 48 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_8-NEXT: Cost of 48 for VF 8: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_16-LABEL: Checking a loop in 'i32_factor_4' +; VF_16: Cost of 96 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_16-NEXT: Cost of 96 for VF 16: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp7>, ir<%tmp3> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0 @@ -862,41 +806,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'i64_factor_4' -; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_2: Cost of 22 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_2-NEXT: Cost of 22 for VF 2: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_4-LABEL: Checking a loop in 'i64_factor_4' -; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_4: Cost of 44 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 44 for VF 4: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_8-LABEL: Checking a loop in 'i64_factor_4' -; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8 -; VF_16-LABEL: Checking a loop in 'i64_factor_4' -; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8 +; VF_8: Cost of 88 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_8-NEXT: Cost of 88 for VF 8: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_16-LABEL: Checking a loop in 'i64_factor_4' +; VF_16: Cost of 176 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_16-NEXT: Cost of 176 for VF 16: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE store ir<%tmp7>, ir<%tmp3> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0 @@ -925,41 +869,17 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f16_factor_4' -; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, ptr %tmp3, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp5, ptr %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp6, ptr %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half %tmp7, ptr %tmp3, align 2 +; VF_2: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_2: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'f16_factor_4' -; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, ptr %tmp3, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp5, ptr %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half %tmp6, ptr %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store half %tmp7, ptr %tmp3, align 2 +; VF_4: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_4: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> ; VF_8-LABEL: Checking a loop in 'f16_factor_4' -; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, ptr %tmp3, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp0, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp5, ptr %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half %tmp6, ptr %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store half %tmp7, ptr %tmp3, align 2 -; VF_16-LABEL: Checking a loop in 'f16_factor_4' -; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, ptr %tmp3, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp0, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp5, ptr %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half %tmp6, ptr %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 144 for VF 16 For instruction: store half %tmp7, ptr %tmp3, align 2 +; VF_8: Cost of 72 for VF 8: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_8: Cost of 72 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f16_factor_4' +; VF_16: Cost of 144 for VF 16: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_16: Cost of 144 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 0 @@ -988,41 +908,23 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f32_factor_4' -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, ptr %tmp3, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp5, ptr %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp6, ptr %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 20 for VF 2 For instruction: store float %tmp7, ptr %tmp3, align 4 +; VF_2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> ; VF_4-LABEL: Checking a loop in 'f32_factor_4' -; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4 +; VF_4: Cost of 8 for VF 4: WIDEN ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp0>, ir<%tmp4> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp1>, ir<%tmp5> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp2>, ir<%tmp6> +; VF_4-NEXT: Cost of 8 for VF 4: WIDEN store ir<%tmp3>, ir<%tmp7> ; VF_8-LABEL: Checking a loop in 'f32_factor_4' -; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, ptr %tmp3, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp0, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp5, ptr %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float %tmp6, ptr %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 80 for VF 8 For instruction: store float %tmp7, ptr %tmp3, align 4 -; VF_16-LABEL: Checking a loop in 'f32_factor_4' -; VF_16: Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, ptr %tmp3, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp0, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp5, ptr %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float %tmp6, ptr %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 160 for VF 16 For instruction: store float %tmp7, ptr %tmp3, align 4 +; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_8: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> +; VF_16-LABEL: Checking a loop in 'f32_factor_4' +; VF_16: Cost of 160 for VF 16: INTERLEAVE-GROUP with factor 4 at %tmp4, ir<%tmp0> +; VF_16: Cost of 160 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%tmp0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 0 @@ -1051,41 +953,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in 'f64_factor_4' -; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 4 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_2: Cost of 6 for VF 2: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_2-NEXT: Cost of 6 for VF 2: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_2-NEXT: Cost of 4 for VF 2: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_4-LABEL: Checking a loop in 'f64_factor_4' -; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_4: Cost of 12 for VF 4: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_4-NEXT: Cost of 12 for VF 4: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_4-NEXT: Cost of 8 for VF 4: REPLICATE store ir<%tmp7>, ir<%tmp3> ; VF_8-LABEL: Checking a loop in 'f64_factor_4' -; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 16 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8 -; VF_16-LABEL: Checking a loop in 'f64_factor_4' -; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 32 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8 +; VF_8: Cost of 24 for VF 8: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_8-NEXT: Cost of 24 for VF 8: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_8-NEXT: Cost of 16 for VF 8: REPLICATE store ir<%tmp7>, ir<%tmp3> +; VF_16-LABEL: Checking a loop in 'f64_factor_4' +; VF_16: Cost of 48 for VF 16: REPLICATE ir<%tmp4> = load ir<%tmp0> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp5> = load ir<%tmp1> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp6> = load ir<%tmp2> +; VF_16-NEXT: Cost of 48 for VF 16: REPLICATE ir<%tmp7> = load ir<%tmp3> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp4>, ir<%tmp0> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp5>, ir<%tmp1> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp6>, ir<%tmp2> +; VF_16-NEXT: Cost of 32 for VF 16: REPLICATE store ir<%tmp7>, ir<%tmp3> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll index c8686d97838b..677b94163592 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/branch-for-predicated-block.ll @@ -32,7 +32,8 @@ for.inc: for.end.loopexit: ret void -; CHECK: LV: Found an estimated cost of 7 for VF 2 For instruction: br i1 %cmp55, label %if.then, label %for.inc -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: br i1 %exitcond, label %for.end.loopexit, label %for.body +; CHECK: Cost of 1 for VF 2: profitable to scalarize store i32 %sub, ptr %arrayidx, align 4 +; CHECK: Cost of 2 for VF 2: profitable to scalarize %sub = sub nsw i32 0, %l +; CHECK: Cost of 1 for VF 2: WIDEN ir<%cmp55> = icmp sgt ir<%l>, ir<0> +; CHECK: Cost of 1 for VF 2: vector loop backedge } diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll index 02f8bc61cbb6..b6e5862d351c 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll @@ -24,6 +24,6 @@ for.body: for.end: ret void -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %mul = mul nsw i64 %iv, %s -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %ld = load i64, ptr %bct +; CHECK: Cost of 2 for VF 2: forced scalar %mul = mul nsw i64 %iv, %s +; CHECK: Cost of 2 for VF 2: REPLICATE ir<%ld> = load ir<%bct> } diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll index 6de9a175d55a..1f88f5771a2e 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-1.ll @@ -24,5 +24,5 @@ for.body: for.end: ret i32 %acc_next -; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %ld = load i32, ptr %gep +; CHECK: Cost of 4 for VF 4: REPLICATE ir<%ld> = load ir<%gep> } diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll index eb03fed1ccfb..d3e0eb847780 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll @@ -6,8 +6,8 @@ ; Check cost function for <8 x i128> store interleave group. ; CHECK: LV: Checking a loop in 'fun' -; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113 -; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113 +; CHECK: Cost of 4 for VF 4: REPLICATE store ir<8721036757475490113>, ir<%arrayidx10.i> +; CHECK: Cost of 4 for VF 4: REPLICATE store ir<8721036757475490113>, ir<%arrayidx10.i.c> define noundef i32 @fun(i32 %argc, ptr nocapture readnone %argv) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll index ee81af23c933..714717b24395 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs.ll @@ -27,7 +27,7 @@ for.end: ret void ; CHECK: LV: Creating an interleave group with: %tmp1 = load i32, ptr %tmp0, align 4 -; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: %tmp1 = load i32, ptr %tmp0, align 4 +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp1, ir<%tmp0> ; (vl; vl; vperm) } @@ -59,12 +59,10 @@ for.end: ; CHECK: LV: Inserted: %tmp1 = load i32, ptr %tmp0, align 4 ; CHECK: into the interleave group with %tmp3 = load i32, ptr %tmp2, align 4 -; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: %tmp1 = load i32, ptr %tmp0, align 4 -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp2, align 4 +; CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %tmp1, ir<%tmp0> ; (vl; vl; vperm, vpkg) -; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp1, ptr %tmp2, align 4 -; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4 +; CHECK: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%tmp0> ; (vmrlf; vmrhf; vst; vst) } diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index 876e0411dff6..d0a9073d5e5e 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -5,8 +5,8 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: phi_two_incoming_values -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ] +; CHECK: Cost of 1 for VF 2: induction instruction %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] +; CHECK: Cost of 1 for VF 2: BLEND ir<%tmp5> = ir<%tmp1> ir<%tmp4>/ir<%tmp3> ; define void @phi_two_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) { ; CHECK-LABEL: define void @phi_two_incoming_values( @@ -82,8 +82,8 @@ for.end: } ; CHECK-LABEL: phi_three_incoming_values -; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] -; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ] +; CHECK: Cost of 1 for VF 2: induction instruction %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] +; CHECK: Cost of 2 for VF 2: BLEND ir<%tmp8> = ir<%tmp7> ir<3>/vp<{{.*}}> ir<9>/vp<{{.*}}> ; define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) { ; CHECK-LABEL: define void @phi_three_incoming_values( diff --git a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll index 74374a014b82..0886d040c268 100644 --- a/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll +++ b/llvm/test/Transforms/LoopVectorize/scalarized-bitcast.ll @@ -3,7 +3,7 @@ %struct.foo = type { i32, i64 } -; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %0 = bitcast ptr %b to ptr +; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%0> = bitcast ir<%b> to ptr ; The bitcast below will be scalarized due to the predication in the loop. Bitcasts ; between pointer types should be treated as free, despite the scalarization.