[LV] Update remaining tests to use VPlan cost output (NFC). (#190038)
Move remaining tests checking legacy cost output to check the VPlan's cost model output. In some cases, checks become much more compact (checking a single interleave group cost vs checking the individual members which all have the group's cost). In some cases, auto-generation consistently checks all relevant VFs. PR: https://github.com/llvm/llvm-project/pull/190038
This commit is contained in:
@@ -17,12 +17,6 @@ define void @udiv_rhs_opt_cost(ptr %dst) #0 {
|
||||
; CHECK: Cost of 0 for VF vscale x 2: IR %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: Cost of 5 for VF vscale x 4: CLONE ir<%div> = udiv vp<[[VP7]]>, ir<3>
|
||||
; CHECK: Cost of 0 for VF vscale x 4: IR %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF 1 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF 2 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF vscale x 1 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF vscale x 2 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
; CHECK: LV: Found an estimated cost of 5 for VF vscale x 4 For instruction: %div = udiv i8 %iv.trunc, 3
|
||||
;
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
@@ -8,6 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i32 %len) #0 {
|
||||
; CHECK-COST-LABEL: LV: Checking a loop in 'zext_i8_i16'
|
||||
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32
|
||||
; CHECK-COST: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
; CHECK-COST: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
; CHECK-COST: Cost of 1 for VF 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
@@ -16,7 +17,6 @@ define void @zext_i8_i16(ptr noalias nocapture readonly %p, ptr noalias nocaptur
|
||||
; CHECK-COST: Cost of 1 for VF vscale x 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
; CHECK-COST: Cost of 1 for VF vscale x 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
; CHECK-COST: Cost of 0 for VF vscale x 8: WIDEN-CAST ir<%conv> = zext ir<%0> to i16
|
||||
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv = zext i8 %0 to i32
|
||||
; CHECK-LABEL: define void @zext_i8_i16
|
||||
; CHECK-SAME: (ptr noalias readonly captures(none) [[P:%.*]], ptr noalias captures(none) [[Q:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
|
||||
@@ -84,22 +84,26 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
|
||||
; CHECK-SCALAR: LV(REG): VF = 1
|
||||
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
|
||||
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
|
||||
; CHECK-LMUL1: LV(REG): VF = vscale x 2
|
||||
; CHECK-LMUL1-LABEL: goo
|
||||
; CHECK-LMUL1: LV(REG): VF = vscale x 1
|
||||
; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
|
||||
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
|
||||
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
|
||||
; CHECK-LMUL2: LV(REG): VF = vscale x 4
|
||||
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
|
||||
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
|
||||
; CHECK-LMUL2-LABEL: goo
|
||||
; CHECK-LMUL2: LV(REG): VF = vscale x 2
|
||||
; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
|
||||
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
|
||||
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
|
||||
; CHECK-LMUL4: LV(REG): VF = vscale x 8
|
||||
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
|
||||
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
|
||||
; CHECK-LMUL4-LABEL: goo
|
||||
; CHECK-LMUL4: LV(REG): VF = vscale x 4
|
||||
; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
|
||||
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
|
||||
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
|
||||
; CHECK-LMUL8: LV(REG): VF = vscale x 16
|
||||
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
|
||||
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
|
||||
; CHECK-LMUL8-LABEL: goo
|
||||
; CHECK-LMUL8: LV(REG): VF = vscale x 8
|
||||
; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
|
||||
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
|
||||
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
|
||||
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
|
||||
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
|
||||
entry:
|
||||
%cmp3 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
|
||||
; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: Cost of 0 for VF vscale x 4: WIDEN-REDUCTION-PHI ir<%rdx> = phi
|
||||
; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]
|
||||
|
||||
define i32 @add(ptr %a, i64 %n, i32 %start) {
|
||||
entry:
|
||||
|
||||
@@ -11,17 +11,17 @@ define hidden i32 @i32_mac_s8(ptr nocapture noundef readonly %a, ptr nocapture n
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %0 = load i8, ptr %arrayidx, align 1
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = sext i8 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %1 = load i8, ptr %arrayidx1, align 1
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
|
||||
; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %arrayidx, align 1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = sext i8 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %arrayidx1, align 1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv2 = sext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 4.
|
||||
entry:
|
||||
%cmp7.not = icmp eq i32 %N, 0
|
||||
@@ -55,17 +55,17 @@ define hidden i32 @i32_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = sext i16 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = sext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = sext i16 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv2 = sext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = sext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = sext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 4.
|
||||
entry:
|
||||
%cmp7.not = icmp eq i32 %N, 0
|
||||
@@ -99,11 +99,11 @@ define hidden i64 @i64_mac_s16(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = sext i16 %1 to i64
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nsw i64 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i16 %0 to i64
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv2 = sext i16 %1 to i64
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nsw i64 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%0> to i64
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = sext ir<%1> to i64
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 2.
|
||||
entry:
|
||||
%cmp7.not = icmp eq i32 %N, 0
|
||||
@@ -136,10 +136,10 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul i32 %1, %0
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv = sext i32 %mul to i64
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i32, ptr %arrayidx, align 4
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i32, ptr %arrayidx1, align 4
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul i32 %1, %0
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = sext i32 %mul to i64
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0>
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = sext ir<%mul> to i64
|
||||
; CHECK: LV: Selecting VF: 2.
|
||||
entry:
|
||||
%cmp6.not = icmp eq i32 %N, 0
|
||||
@@ -172,17 +172,17 @@ define hidden i32 @i32_mac_u8(ptr nocapture noundef readonly %a, ptr nocapture n
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %0 = load i8, ptr %arrayidx, align 1
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = zext i8 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 2 For instruction: %1 = load i8, ptr %arrayidx1, align 1
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = zext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 3 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
|
||||
; CHECK: Cost of 3 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i8, ptr %arrayidx, align 1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv = zext i8 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i8, ptr %arrayidx1, align 1
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %conv2 = zext i8 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 4.
|
||||
entry:
|
||||
%cmp7.not = icmp eq i32 %N, 0
|
||||
@@ -216,17 +216,17 @@ define hidden i32 @i32_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv = zext i16 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv2 = zext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv = zext i16 %0 to i32
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %conv2 = zext i16 %1 to i32
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %mul = mul nuw nsw i32 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv> = zext ir<%0> to i32
|
||||
; CHECK: Cost of 2 for VF 4: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%conv2> = zext ir<%1> to i32
|
||||
; CHECK: Cost of 1 for VF 4: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 4.
|
||||
entry:
|
||||
%cmp7.not = icmp eq i32 %N, 0
|
||||
@@ -260,11 +260,11 @@ define hidden i64 @i64_mac_u16(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 0 for VF 1 For instruction: %conv2 = zext i16 %1 to i64
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul nuw nsw i64 %conv2, %conv
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i16, ptr %arrayidx, align 2
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i16 %0 to i64
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i16, ptr %arrayidx1, align 2
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv2 = zext i16 %1 to i64
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul nuw nsw i64 %conv2, %conv
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%0> to i64
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv2> = zext ir<%1> to i64
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul nuw nsw ir<%conv2>, ir<%conv>
|
||||
; CHECK: LV: Selecting VF: 2.
|
||||
entry:
|
||||
%cmp8.not = icmp eq i32 %N, 0
|
||||
@@ -297,10 +297,10 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %mul = mul i32 %1, %0
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %conv = zext i32 %mul to i64
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %0 = load i32, ptr %arrayidx, align 4
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %1 = load i32, ptr %arrayidx1, align 4
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %mul = mul i32 %1, %0
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %conv = zext i32 %mul to i64
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%0> = load
|
||||
; CHECK: Cost of 2 for VF 2: WIDEN ir<%1> = load
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN ir<%mul> = mul ir<%1>, ir<%0>
|
||||
; CHECK: Cost of 1 for VF 2: WIDEN-CAST ir<%conv> = zext ir<%mul> to i64
|
||||
; CHECK: LV: Selecting VF: 2.
|
||||
entry:
|
||||
%cmp6.not = icmp eq i32 %N, 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9], ptr %__last" --filter "LV: Found an estimated cost of [0-9] for VF [0-9] For instruction:\s*store ptr %[0-9]" --version 5
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store ptr" --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at" --version 5
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
@@ -6,9 +6,12 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define ptr @foo(ptr %__first, ptr %__last) #0 {
|
||||
; CHECK-LABEL: 'foo'
|
||||
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store ptr %0, ptr %__last, align 8
|
||||
; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: store ptr %0, ptr %__last, align 8
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 4 For instruction: store ptr %0, ptr %__last, align 8
|
||||
; CHECK: LV: Found an estimated cost of 3 for VF 8 For instruction: store ptr %0, ptr %__last, align 8
|
||||
; CHECK: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
|
||||
; CHECK: ir<%0> = load from index 0
|
||||
; CHECK: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
|
||||
; CHECK: ir<%0> = load from index 0
|
||||
; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %0, vp<%next.gep>
|
||||
; CHECK: ir<%0> = load from index 0
|
||||
;
|
||||
entry:
|
||||
%cmp.not1 = icmp eq ptr %__first, %__last
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load float, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load float, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 15 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: ir<%v6> = load from index 6
|
||||
; AVX2: ir<%v7> = load from index 7
|
||||
; AVX2: Cost of 30 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 60 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
|
||||
; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,67 +14,55 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,99 +14,60 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,126 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,149 +14,80 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,165 +14,67 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load double, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; SSE2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 6 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 12 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 24 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX1: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 32 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX2: Cost of 3 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 7 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 14 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 28 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 56 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
|
||||
; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF 32: INTERLEAVE-GROUP with factor [0-9]+ at %0," --version 5
|
||||
; RUN: opt -S -passes=loop-vectorize -debug-only=loop-vectorize -mattr=avx512fp16 %s 2>&1 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
|
||||
@@ -9,9 +9,11 @@ target triple = "i386-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @stride8(half %k, i32 %width_) {
|
||||
; CHECK-LABEL: 'stride8'
|
||||
; CHECK: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx>
|
||||
;
|
||||
entry:
|
||||
|
||||
; CHECK: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %0, ir<%arrayidx>
|
||||
|
||||
%cmp72 = icmp sgt i32 %width_, 0
|
||||
br i1 %cmp72, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
@@ -97,9 +99,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @stride3(half %k, i32 %width_) {
|
||||
; CHECK-LABEL: 'stride3'
|
||||
; CHECK: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %0, ir<%arrayidx>
|
||||
;
|
||||
entry:
|
||||
|
||||
; CHECK: LV: Found an estimated cost of 18 for VF 32 For instruction: %0 = load half, ptr %arrayidx, align 4
|
||||
|
||||
%cmp27 = icmp sgt i32 %width_, 0
|
||||
br i1 %cmp27, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,86 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 34 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 34 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 31 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 62 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 31 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 62 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 59 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 18 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 81 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 7 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 81 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 79 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 158 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 79 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 158 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 77 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 154 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 148 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 148 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 28 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 55 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 235 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 235 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i16, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,146 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 224 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 109 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 218 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 81 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 342 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 342 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 15 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 19 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 112 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 469 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 469 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i16, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,269 +15,140 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; SSE2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 33 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 66 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 148 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
|
||||
; AVX512BW: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 616 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,50 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 13 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 50 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 1 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 2 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 13 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 50 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,65 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 22 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 92 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 144 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 36 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 144 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 23 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 11 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 23 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 78 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 21 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 78 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,68 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 20 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 210 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 51 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 210 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,38 +14,64 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 67 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 67 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 17 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 71 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 71 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,67 +14,54 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 50 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,39 +14,45 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: Cost of 33 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 29 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 1 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 1 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 29 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,35 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 84 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 92 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,136 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 145 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 145 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i32, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,36 +14,88 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 37 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,179 +14,61 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i32, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,187 +14,73 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 22 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 44 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 17 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: ir<%v6> = load from index 6
|
||||
; AVX2: ir<%v7> = load from index 7
|
||||
; AVX2: Cost of 34 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 68 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
|
||||
; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,67 +14,59 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 40 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; SSE2: ir<%v0> = load from index 0
|
||||
; SSE2: ir<%v1> = load from index 1
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX1: ir<%v0> = load from index 0
|
||||
; AVX1: ir<%v1> = load from index 1
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,86 +14,57 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,99 +14,72 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,116 +14,62 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 18 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,137 +14,86 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,165 +14,74 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 35 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i64, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load|WIDEN ir<%v[0-9]> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,171 +14,79 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 10 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 20 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 40 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 9 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 18 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 36 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 72 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
|
||||
; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512: ir<%v0> = load from index 0
|
||||
; AVX512: ir<%v1> = load from index 1
|
||||
; AVX512: ir<%v2> = load from index 2
|
||||
; AVX512: ir<%v3> = load from index 3
|
||||
; AVX512: ir<%v4> = load from index 4
|
||||
; AVX512: ir<%v5> = load from index 5
|
||||
; AVX512: ir<%v6> = load from index 6
|
||||
; AVX512: ir<%v7> = load from index 7
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN ir<%v7> = load ir<%in7>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v0> = load ir<%in0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v1> = load ir<%in1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v2> = load ir<%in2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v3> = load ir<%in3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v4> = load ir<%in4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v5> = load ir<%in5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v6> = load ir<%in6>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN ir<%v7> = load ir<%in7>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,78 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 9 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 41 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,95 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: Cost of 17 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 6 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 25 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: Cost of 25 for VF 64: INTERLEAVE-GROUP with factor 3 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v0 = load i8, ptr %in0"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,49 +15,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 26 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 13 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 25 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 58 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 5 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 238 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 17 for VF 8: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: Cost of 238 for VF 64: INTERLEAVE-GROUP with factor 4 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,193 +15,104 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 45 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 165 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 198 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 395 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 41 for VF 8: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 99 for VF 16: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 198 for VF 32: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,229 +15,146 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 46 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
; AVX2: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX2: ir<%v0> = load from index 0
|
||||
; AVX2: ir<%v1> = load from index 1
|
||||
; AVX2: ir<%v2> = load from index 2
|
||||
; AVX2: ir<%v3> = load from index 3
|
||||
; AVX2: ir<%v4> = load from index 4
|
||||
; AVX2: ir<%v5> = load from index 5
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 45 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 85 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 25 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 49 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 119 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 237 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 591 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 49 for VF 8: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 119 for VF 16: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,265 +15,128 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 62 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 233 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 29 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 138 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 413 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 826 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 138 for VF 16: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 413 for VF 32: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%v. = load i8, ptr %in."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%v0 = load" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at %v0,|REPLICATE ir<%v0> = load)" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -15,301 +15,140 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 5 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 47 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; SSE2: Cost of 5 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 11 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 23 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; SSE2: Cost of 47 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX1: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX1: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX2: Cost of 4 for VF 2: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 8 for VF 4: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 16 for VF 8: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 32 for VF 16: REPLICATE ir<%v0> = load ir<%in0>
|
||||
; AVX2: Cost of 65 for VF 32: REPLICATE ir<%v0> = load ir<%in0>
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512DQ: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
; AVX512DQ: Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512DQ: ir<%v0> = load from index 0
|
||||
; AVX512DQ: ir<%v1> = load from index 1
|
||||
; AVX512DQ: ir<%v2> = load from index 2
|
||||
; AVX512DQ: ir<%v3> = load from index 3
|
||||
; AVX512DQ: ir<%v4> = load from index 4
|
||||
; AVX512DQ: ir<%v5> = load from index 5
|
||||
; AVX512DQ: ir<%v6> = load from index 6
|
||||
; AVX512DQ: ir<%v7> = load from index 7
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 65 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 158 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 472 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1100 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i8, ptr %in1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i8, ptr %in2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i8, ptr %in3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i8, ptr %in4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v5 = load i8, ptr %in5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v6 = load i8, ptr %in6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 0 for VF 64 For instruction: %v7 = load i8, ptr %in7, align 1
|
||||
; AVX512BW: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 65 for VF 8: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
; AVX512BW: Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at %v0, ir<%in0>
|
||||
; AVX512BW: ir<%v0> = load from index 0
|
||||
; AVX512BW: ir<%v1> = load from index 1
|
||||
; AVX512BW: ir<%v2> = load from index 2
|
||||
; AVX512BW: ir<%v3> = load from index 3
|
||||
; AVX512BW: ir<%v4> = load from index 4
|
||||
; AVX512BW: ir<%v5> = load from index 5
|
||||
; AVX512BW: ir<%v6> = load from index 6
|
||||
; AVX512BW: ir<%v7> = load from index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 30 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 60 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 120 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 32 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 64 For instruction: store float %v1, ptr %out1, align 4
|
||||
; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; SSE2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 45 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 90 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 180 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 24 for VF 32 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 48 for VF 64 For instruction: store float %v2, ptr %out2, align 4
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; SSE2: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 240 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 8 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 44 for VF 32 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 88 for VF 64 For instruction: store float %v3, ptr %out3, align 4
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 88 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; SSE2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 75 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 150 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 300 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 32 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 64 For instruction: store float %v4, ptr %out4, align 4
|
||||
; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; SSE2: Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX1: Cost of 19 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 42 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 90 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 180 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 39 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 78 for VF 16 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 360 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 17 for VF 4 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 25 for VF 8 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 102 for VF 32 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 204 for VF 64 For instruction: store float %v5, ptr %out5, align 4
|
||||
; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 52 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 104 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; SSE2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 50 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 105 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 4 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store float %v6, ptr %out6, align 4
|
||||
; AVX512: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store float %v7, ptr %out7"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; SSE2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 480 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 46 for VF 8 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 184 for VF 32 For instruction: store float %v7, ptr %out7, align 4
|
||||
; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 16 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 16 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 32 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 64 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 40 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 192 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 224 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 44 for VF 16 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 88 for VF 32 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 18 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 36 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 72 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 144 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 80 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 160 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: Cost of 21 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 384 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 17 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 25 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 102 for VF 16 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 204 for VF 32 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v0> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v0> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 104 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 208 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 416 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v0> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v0> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store double %v., ptr %out."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store double %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (WIDEN store|REPLICATE store ir<%v\.>)"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,164 +13,41 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v7, ptr %out7, align 8
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v7, ptr %out7, align 8
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v0>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v0>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v0>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v0>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,92 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 68 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 34 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 6 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 12 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512DQ: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: Cost of 284 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 3 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 7 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 14 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2
|
||||
; AVX512BW: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: Cost of 7 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: Cost of 14 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,118 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 26 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 51 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: LV: Found an estimated cost of 102 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; SSE2: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 26 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX1: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 29 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 52 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 105 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 210 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512DQ: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: Cost of 15 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: Cost of 57 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: Cost of 426 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 6 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 6 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 18 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 36 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2
|
||||
; AVX512BW: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: Cost of 18 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: Cost of 36 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,144 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: LV: Found an estimated cost of 136 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; SSE2: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 136 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 34 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 68 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 72 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 3 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512DQ: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: Cost of 34 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: Cost of 68 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: Cost of 568 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 17 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 34 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 68 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2
|
||||
; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: Cost of 34 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: Cost of 68 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,170 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 43 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: LV: Found an estimated cost of 170 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 43 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 85 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 170 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX1: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 175 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 350 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512DQ: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: Cost of 47 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: Cost of 86 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: Cost of 355 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: Cost of 710 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 22 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 33 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 55 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 110 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2
|
||||
; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: Cost of 33 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: Cost of 55 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: Cost of 110 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,196 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 51 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; SSE2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 51 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX1: Cost of 29 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 102 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 210 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 420 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: LV: Found an estimated cost of 102 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX2: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512DQ: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: Cost of 23 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: Cost of 61 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: Cost of 852 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 40 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 81 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 162 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2
|
||||
; AVX512BW: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: Cost of 13 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: Cost of 27 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: Cost of 81 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: Cost of 162 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,222 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: LV: Found an estimated cost of 238 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; SSE2: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 238 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX1: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX2: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 245 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 490 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512DQ: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 65 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 122 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 246 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 497 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 994 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 32 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 112 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 224 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2
|
||||
; AVX512BW: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: Cost of 112 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: Cost of 224 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %v7, ptr %out7"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,248 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: LV: Found an estimated cost of 272 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; SSE2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 272 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX1: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 560 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: Cost of 68 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: Cost of 136 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: Cost of 280 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: Cost of 568 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: Cost of 1136 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 18 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 18 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 37 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 74 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 148 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: LV: Found an estimated cost of 296 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2
|
||||
; AVX512BW: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: Cost of 37 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: Cost of 74 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: Cost of 148 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: Cost of 296 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 30 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 60 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 120 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 18 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 38 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 76 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 152 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 64 For instruction: store i32 %v1, ptr %out1, align 4
|
||||
; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 10 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 20 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 96 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: LV: Found an estimated cost of 192 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; SSE2: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 48 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 96 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 192 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 57 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 114 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 228 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: LV: Found an estimated cost of 60 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX2: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 28 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 60 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 24 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: LV: Found an estimated cost of 48 for VF 64 For instruction: store i32 %v2, ptr %out2, align 4
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 8 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 24 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 48 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 120 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 240 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX1: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 36 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 76 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 152 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 304 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 20 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: LV: Found an estimated cost of 80 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 20 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 40 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 80 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 44 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: LV: Found an estimated cost of 88 for VF 64 For instruction: store i32 %v3, ptr %out3, align 4
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 22 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 44 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 88 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; SSE2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; SSE2: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 84 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 168 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 336 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 95 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 190 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 380 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 7 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 32 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 64 For instruction: store i32 %v4, ptr %out4, align 4
|
||||
; AVX512: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 21 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 35 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 70 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 140 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 45 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; SSE2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; SSE2: Cost of 45 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 96 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 192 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 384 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX1: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 54 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 114 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 228 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 15 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 39 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: LV: Found an estimated cost of 78 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 15 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 39 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 78 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 456 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 17 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 25 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 102 for VF 32 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: LV: Found an estimated cost of 204 for VF 64 For instruction: store i32 %v5, ptr %out5, align 4
|
||||
; AVX512: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 17 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 25 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 51 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 102 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 204 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 51 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 108 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; SSE2: LV: Found an estimated cost of 216 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; SSE2: Cost of 51 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 108 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 216 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 432 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX1: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX2: Cost of 35 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 64 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 133 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 266 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 532 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store i32 %v6, ptr %out6, align 4
|
||||
; AVX512: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i32 %v7, ptr %out7"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,192 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 120 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; SSE2: LV: Found an estimated cost of 240 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 240 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 480 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX1: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX2: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 72 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 608 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v7, ptr %out7, align 4
|
||||
; AVX512: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 23 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 46 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 92 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 184 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 368 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,40 +13,72 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 22 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 44 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 88 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 176 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 6 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 2 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 64 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: Cost of 40 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,37 +13,92 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 36 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: Cost of 264 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 4 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 8 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 12 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 24 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 48 for VF 32 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 96 for VF 64 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 24 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 48 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: Cost of 96 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,36 +13,112 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 56 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 112 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 224 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 44 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 88 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 176 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 28 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 56 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: Cost of 352 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 22 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 44 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 88 for VF 32 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 22 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 44 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 88 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: Cost of 176 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,132 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: Cost of 38 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 76 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 152 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 304 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 55 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 110 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 220 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 440 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 35 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 32 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 35 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 70 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 140 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: Cost of 280 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,152 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: Cost of 30 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 21 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 42 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 21 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 42 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: Cost of 528 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 25 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 51 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 102 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 204 for VF 32 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 25 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 51 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 102 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 204 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: Cost of 408 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,33 +13,172 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 50 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 100 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: Cost of 50 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 100 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 200 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 400 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: Cost of 36 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 77 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 154 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 308 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 616 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 40 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 70 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: Cost of 20 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 40 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 70 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 140 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 280 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: Cost of 560 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i64 %v., ptr %out."
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i64 %v\., ptr %out" --filter "Cost of [0-9]+ for VF [0-9]+: (INTERLEAVE-GROUP with factor [0-9]+ at <badref>,|WIDEN store|REPLICATE store ir<%v\.>)" --filter "^ store ir<%v.?> to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -13,164 +13,188 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; SSE2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 224 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 448 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX1: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX1: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX2: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX2: Cost of 40 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 88 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 176 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 352 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 704 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512-LABEL: 'test'
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 23 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
|
||||
; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
|
||||
; AVX512: Cost of 23 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512: store ir<%v> to index 0
|
||||
; AVX512: store ir<%v1> to index 1
|
||||
; AVX512: store ir<%v2> to index 2
|
||||
; AVX512: store ir<%v3> to index 3
|
||||
; AVX512: store ir<%v4> to index 4
|
||||
; AVX512: store ir<%v5> to index 5
|
||||
; AVX512: store ir<%v6> to index 6
|
||||
; AVX512: store ir<%v7> to index 7
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out0>, ir<%v>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 10 for VF 8: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out0>, ir<%v>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 20 for VF 16: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out0>, ir<%v>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 40 for VF 32: WIDEN store ir<%out7>, ir<%v7>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out0>, ir<%v>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out1>, ir<%v1>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out2>, ir<%v2>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out3>, ir<%v3>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out4>, ir<%v4>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out5>, ir<%v5>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out6>, ir<%v6>
|
||||
; AVX512: Cost of 80 for VF 64: WIDEN store ir<%out7>, ir<%v7>
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v1, ptr %out1"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,106 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: LV: Found an estimated cost of 126 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; SSE2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: Cost of 126 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX1: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 66 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: Cost of 134 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: LV: Found an estimated cost of 6 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX2: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 6 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512DQ: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 270 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 4 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 20 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 41 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1
|
||||
; AVX512BW: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 4 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 20 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 41 for VF 64: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v2, ptr %out2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,132 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 52 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: LV: Found an estimated cost of 204 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; SSE2: Cost of 26 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 52 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 101 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: Cost of 204 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX1: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 27 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 53 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 100 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: Cost of 201 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX2: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 7 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512DQ: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 15 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 405 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 8 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 29 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1
|
||||
; AVX512BW: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 29 for VF 64: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v3, ptr %out3"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,158 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 124 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; SSE2: Cost of 28 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 60 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 124 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: Cost of 252 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX1: Cost of 17 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 33 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 66 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 132 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: Cost of 268 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX2: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 10 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512DQ: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 14 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 540 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 12 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 28 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1
|
||||
; AVX512BW: Cost of 11 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 12 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 28 for VF 64: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v4, ptr %out4"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,180 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 87 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 178 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: LV: Found an estimated cost of 360 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; SSE2: Cost of 44 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 87 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 178 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: Cost of 360 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX1: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX2: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 84 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: Cost of 335 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 24 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512DQ: Cost of 24 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 87 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 166 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 336 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 675 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 31 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 79 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 158 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 237 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 395 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1
|
||||
; AVX512BW: Cost of 15 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 31 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 79 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 158 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 237 for VF 32: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 395 for VF 64: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v5, ptr %out5"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,210 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 49 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 98 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 201 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: LV: Found an estimated cost of 408 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; SSE2: Cost of 49 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 98 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 201 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: Cost of 408 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX1: Cost of 27 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 100 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 198 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: Cost of 402 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: LV: Found an estimated cost of 96 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX2: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 18 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 30 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: ir<%v> = load from index 0
|
||||
; AVX2: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 12 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512DQ: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 19 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 29 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 93 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 810 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 38 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 98 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 197 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 295 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 591 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1
|
||||
; AVX512BW: Cost of 18 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 38 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 98 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 197 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 295 for VF 32: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 591 for VF 64: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v6, ptr %out6"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,231 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 57 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 112 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 225 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: LV: Found an estimated cost of 456 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; SSE2: Cost of 57 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 112 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 225 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: Cost of 456 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX1: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX2: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 119 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 232 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: Cost of 469 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512DQ: Cost of 34 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: Cost of 63 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 121 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 234 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 470 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 945 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 22 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 118 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 236 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 472 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 826 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1
|
||||
; AVX512BW: Cost of 22 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 46 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 118 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 236 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 472 for VF 32: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 826 for VF 64: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i8 %v7, ptr %out7"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost of [0-9]+ for VF [0-9]+: INTERLEAVE-GROUP with factor [0-9]+ at .*, ir<%out" --filter "^ store ir<.* to index"
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+sse2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX1
|
||||
; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -mattr=+avx2 --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=AVX2
|
||||
@@ -14,50 +14,258 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test() {
|
||||
; SSE2-LABEL: 'test'
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 56 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 120 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 248 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: LV: Found an estimated cost of 504 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; SSE2: Cost of 56 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 120 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 248 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
; SSE2: Cost of 504 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; SSE2: store ir<%v> to index 0
|
||||
; SSE2: store ir<%v1> to index 1
|
||||
; SSE2: store ir<%v2> to index 2
|
||||
; SSE2: store ir<%v3> to index 3
|
||||
; SSE2: store ir<%v4> to index 4
|
||||
; SSE2: store ir<%v5> to index 5
|
||||
; SSE2: store ir<%v6> to index 6
|
||||
; SSE2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX1-LABEL: 'test'
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX1: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
; AVX1: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX1: store ir<%v> to index 0
|
||||
; AVX1: store ir<%v1> to index 1
|
||||
; AVX1: store ir<%v2> to index 2
|
||||
; AVX1: store ir<%v3> to index 3
|
||||
; AVX1: store ir<%v4> to index 4
|
||||
; AVX1: store ir<%v5> to index 5
|
||||
; AVX1: store ir<%v6> to index 6
|
||||
; AVX1: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX2-LABEL: 'test'
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX2: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
; AVX2: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX2: store ir<%v> to index 0
|
||||
; AVX2: store ir<%v1> to index 1
|
||||
; AVX2: store ir<%v2> to index 2
|
||||
; AVX2: store ir<%v3> to index 3
|
||||
; AVX2: store ir<%v4> to index 4
|
||||
; AVX2: store ir<%v5> to index 5
|
||||
; AVX2: store ir<%v6> to index 6
|
||||
; AVX2: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512DQ-LABEL: 'test'
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512DQ: Cost of 33 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 66 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 132 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 264 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 536 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
; AVX512DQ: ir<%v> = load from index 0
|
||||
; AVX512DQ: Cost of 1080 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512DQ: store ir<%v> to index 0
|
||||
; AVX512DQ: store ir<%v1> to index 1
|
||||
; AVX512DQ: store ir<%v2> to index 2
|
||||
; AVX512DQ: store ir<%v3> to index 3
|
||||
; AVX512DQ: store ir<%v4> to index 4
|
||||
; AVX512DQ: store ir<%v5> to index 5
|
||||
; AVX512DQ: store ir<%v6> to index 6
|
||||
; AVX512DQ: store ir<%v7> to index 7
|
||||
;
|
||||
; AVX512BW-LABEL: 'test'
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 25 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 137 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 275 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 550 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: LV: Found an estimated cost of 1100 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1
|
||||
; AVX512BW: Cost of 25 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 53 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 137 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 275 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 550 for VF 32: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
; AVX512BW: ir<%v> = load from index 0
|
||||
; AVX512BW: Cost of 1100 for VF 64: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%out0>
|
||||
; AVX512BW: store ir<%v> to index 0
|
||||
; AVX512BW: store ir<%v1> to index 1
|
||||
; AVX512BW: store ir<%v2> to index 2
|
||||
; AVX512BW: store ir<%v3> to index 3
|
||||
; AVX512BW: store ir<%v4> to index 4
|
||||
; AVX512BW: store ir<%v5> to index 5
|
||||
; AVX512BW: store ir<%v6> to index 6
|
||||
; AVX512BW: store ir<%v7> to index 7
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*%i[2,4] = load i16, ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [0-9]+ for VF [0-9]+: (REPLICATE ir<%i[24]> = load|INTERLEAVE-GROUP with factor [0-9]+ at %i[24])" --filter "^ ir<.* = load from index"
|
||||
; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED
|
||||
; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED
|
||||
; REQUIRES: asserts
|
||||
@@ -22,26 +22,30 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test1'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7>
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test1'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
@@ -79,26 +83,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test2'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i2> = load ir<%arrayidx2> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx7> (S->V)
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test2'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 11 for VF 4: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 17 for VF 16: INTERLEAVE-GROUP with factor 4 at %i2, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i2> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 1
|
||||
;
|
||||
entry:
|
||||
%cmp15 = icmp sgt i32 %numPoints, 0
|
||||
@@ -146,26 +154,22 @@ define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE ir<%i4> = load ir<%arrayidx6> (S->V)
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 2 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 4 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 4 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 8 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 9 for VF 8 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 16 For instruction: %i2 = load i16, ptr %arrayidx, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 16 For instruction: %i4 = load i16, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0
|
||||
; ENABLED_MASKED_STRIDED: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 3 at %i4, ir<%arrayidx6>, ir<%cmp1>
|
||||
; ENABLED_MASKED_STRIDED: ir<%i4> = load from index 0
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF [0-9]+ For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2"
|
||||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "LV: Found an estimated cost of [0-9]+ for VF 1 For instruction:\s*store i16 %[0,2], ptr %[a-zA-Z0-7]+, align 2" --filter "Cost of [1-9][0-9]* for VF [0-9]+: (profitable to scalarize\s+store i16 %[02]|REPLICATE store ir<%[02]>|INTERLEAVE-GROUP with factor [0-9]+ at <badref>)"
|
||||
; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED
|
||||
; RUN: opt -passes=loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize --disable-output < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED
|
||||
; REQUIRES: asserts
|
||||
@@ -22,30 +22,29 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test1'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 13 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 27 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 55 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test1'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: Cost of 6 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; ENABLED_MASKED_STRIDED: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
@@ -83,30 +82,30 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test2'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 17 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 71 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 8 for VF 2: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 2: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 17 for VF 4: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 4: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 35 for VF 8: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 8: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 71 for VF 16: REPLICATE store ir<%0>, ir<%arrayidx2>
|
||||
; DISABLED_MASKED_STRIDED: Cost of 3000000 for VF 16: REPLICATE store ir<%2>, ir<%arrayidx7>
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test2'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 8 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, ptr %arrayidx2, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 16 For instruction: store i16 %2, ptr %arrayidx7, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 13 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8:%[0-9]+]]>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 14 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 14 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
; ENABLED_MASKED_STRIDED: Cost of 27 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%arrayidx2>, vp<[[VP8]]>
|
||||
; ENABLED_MASKED_STRIDED: store ir<%0> to index 0
|
||||
; ENABLED_MASKED_STRIDED: store ir<%2> to index 1
|
||||
;
|
||||
entry:
|
||||
%cmp15 = icmp sgt i32 %numPoints, 0
|
||||
@@ -153,19 +152,17 @@ for.end:
|
||||
define void @test(ptr noalias nocapture %points, ptr noalias nocapture readonly %x, ptr noalias nocapture readnone %y) {
|
||||
; DISABLED_MASKED_STRIDED-LABEL: 'test'
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; DISABLED_MASKED_STRIDED: Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
;
|
||||
; ENABLED_MASKED_STRIDED-LABEL: 'test'
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 2 for VF 2: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 4 for VF 4: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 8 for VF 8: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
; ENABLED_MASKED_STRIDED: Cost of 16 for VF 16: profitable to scalarize store i16 %0, ptr %arrayidx6, align 2
|
||||
;
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user