Files
llvm-project/llvm/test/Transforms/LoopVectorize/select-cmp.ll
Florian Hahn 5995fe951f [VPlan] Normalize selects to always select the data op when cond is true.
Fix a miscompile in the FindLast handling by normalizing selects
with the phi node as the first op to ones that select the data value
when the condition is true, by swapping operands and inverting the
condition.

This should ensure correct codegen for both cases.

Select normalization:
https://alive2.llvm.org/ce/z/yFdivK

Fixes a miscompile reported for 2abd6d6d7a (#158088).
2026-01-17 18:30:52 +00:00

1355 lines
93 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC1
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC4
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF1IC4
define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp(
; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 7, i32 3
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp(
; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 7, i32 3
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp(
; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 7, i32 3
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 3, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
%load.v.iv = load i32, ptr %gep.v.iv, align 4
%cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3
%sel = select i1 %cmp.v.iv.3, i32 %rdx, i32 7
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp2(
; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI]], [[TMP3]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP6]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp2(
; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI]], [[TMP6]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = or <4 x i1> [[VEC_PHI1]], [[TMP7]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI2]], [[TMP8]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI3]], [[TMP9]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP11]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP12]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP13]], [[BIN_RDX7]]
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]])
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = freeze i1 [[TMP15]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP16]], i32 7, i32 3
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp2(
; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP16]] = or i1 [[VEC_PHI]], [[TMP12]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = or i1 [[VEC_PHI1]], [[TMP13]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = or i1 [[VEC_PHI2]], [[TMP14]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP17]], [[TMP16]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP19]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP21]], i32 7, i32 3
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 3, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
%load.v.iv = load i32, ptr %gep.v.iv, align 4
%cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3
%sel = select i1 %cmp.v.iv.3, i32 7, i32 %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_i32_from_icmp(
; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 [[B]], i32 [[A]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_i32_from_icmp(
; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[B]], i32 [[A]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_i32_from_icmp(
; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 [[B]], i32 [[A]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ %a, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
%load.v.iv = load i32, ptr %gep.v.iv, align 4
%cmp.load.iv.3 = icmp eq i32 %load.v.iv, 3
%sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 %b
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp_fast(
; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 2
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast(
; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 2
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast(
; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast one float [[TMP8]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast one float [[TMP9]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast one float [[TMP10]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast one float [[TMP11]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 2, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds float, ptr %v, i64 %iv
%load.v.iv = load float, ptr %gep.v.iv, align 4
%cmp.load.iv.3 = fcmp fast ueq float %load.v.iv, 3.0
%sel = select i1 %cmp.load.iv.3, i32 %rdx, i32 1
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp(
; CHECK-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = fcmp one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 2
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp(
; CHECK-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = fcmp one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = fcmp one <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = fcmp one <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = fcmp one <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI2]], [[TMP12]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI3]], [[TMP13]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX7]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX8]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 2
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp(
; CHECK-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp one float [[TMP8]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp one float [[TMP9]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp one float [[TMP10]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp one float [[TMP11]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP12]]
; CHECK-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP13]]
; CHECK-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP14]]
; CHECK-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP15]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 2, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds float, ptr %v, i64 %iv
%load.v.iv = load float, ptr %gep.v.iv, align 4
%cmp.v.iv.3 = fcmp ueq float %load.v.iv, 3.0
%sel = select i1 %cmp.v.iv.3, i32 %rdx, i32 1
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_i32_from_icmp_same_inputs(
; CHECK-VF4IC1-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i32 [[B]], i32 [[A]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs(
; CHECK-VF4IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[TMP6]] = or <4 x i1> [[VEC_PHI1]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[TMP7]] = or <4 x i1> [[VEC_PHI2]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[TMP8]] = or <4 x i1> [[VEC_PHI3]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX4:%.*]] = or <4 x i1> [[TMP7]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP8]], [[BIN_RDX4]]
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]])
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[B]], i32 [[A]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs(
; CHECK-VF1IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A]], 3
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI]], [[TMP0]]
; CHECK-VF1IC4-NEXT: [[TMP6]] = or i1 [[VEC_PHI1]], [[TMP0]]
; CHECK-VF1IC4-NEXT: [[TMP7]] = or i1 [[VEC_PHI2]], [[TMP0]]
; CHECK-VF1IC4-NEXT: [[TMP8]] = or i1 [[VEC_PHI3]], [[TMP0]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP6]], [[TMP5]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP7]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP8]], [[BIN_RDX4]]
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = freeze i1 [[BIN_RDX5]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP10]], i32 [[B]], i32 [[A]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ %a, %entry ], [ %sel, %loop ]
%cmp.rdx.3 = icmp eq i32 %rdx, 3
%sel = select i1 %cmp.rdx.3, i32 %rdx, i32 %b
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
;; Negative tests
; We don't support FP reduction variables at the moment.
define float @select_const_f32_from_icmp(ptr %v, i64 %n) {
; CHECK-LABEL: define float @select_const_f32_from_icmp(
; CHECK-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi fast float [ 3.000000e+00, %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
; CHECK-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
; CHECK-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
; CHECK-NEXT: [[SEL]] = select fast i1 [[CMP_V_IV_3]], float [[RDX]], float 7.000000e+00
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi float [ [[SEL]], %[[LOOP]] ]
; CHECK-NEXT: ret float [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi fast float [ 3.0, %entry ], [ %sel, %loop ]
%gep.v.iv = getelementptr inbounds i32, ptr %v, i64 %iv
%load.v.iv = load i32, ptr %gep.v.iv, align 4
%cmp.v.iv.3 = icmp eq i32 %load.v.iv, 3
%sel = select fast i1 %cmp.v.iv.3, float %rdx, float 7.0
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret float %sel
}
define i32 @select_variant_i32_from_icmp(ptr %v1, ptr %v2, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_variant_i32_from_icmp(
; CHECK-VF4IC1-SAME: ptr [[V1:%.*]], ptr [[V2:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 3), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3)
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
; CHECK-VF4IC1-NEXT: [[TMP5]] = select i1 [[TMP4]], <4 x i1> [[TMP2]], <4 x i1> [[LAST_ACTIVE_MASK]]
; CHECK-VF4IC1-NEXT: [[TMP6]] = select i1 [[TMP4]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP6]], <4 x i1> [[TMP5]], i32 3)
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC1: [[LOOP]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC1-NEXT: [[GEP_V1_IV:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V1_IV:%.*]] = load i32, ptr [[GEP_V1_IV]], align 4
; CHECK-VF4IC1-NEXT: [[GEP_V2_IV:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[LOAD_V2_IV:%.*]] = load i32, ptr [[GEP_V2_IV]], align 4
; CHECK-VF4IC1-NEXT: [[CMP_V1_IV_3:%.*]] = icmp eq i32 [[LOAD_V1_IV]], 3
; CHECK-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V1_IV_3]], i32 [[RDX]], i32 [[LOAD_V2_IV]]
; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i32 @select_variant_i32_from_icmp(
; CHECK-VF4IC4-SAME: ptr [[V1:%.*]], ptr [[V2:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 3), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD9]], splat (i32 3)
; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP17]])
; CHECK-VF4IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], <4 x i1> [[TMP11]], <4 x i1> [[LAST_ACTIVE_MASK]]
; CHECK-VF4IC4-NEXT: [[TMP27]] = select i1 [[TMP19]], <4 x i32> [[WIDE_LOAD13]], <4 x i32> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC4-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP27]], <4 x i1> [[TMP23]], i32 3)
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF4IC4: [[LOOP]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF4IC4-NEXT: [[GEP_V1_IV:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V1_IV:%.*]] = load i32, ptr [[GEP_V1_IV]], align 4
; CHECK-VF4IC4-NEXT: [[GEP_V2_IV:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[LOAD_V2_IV:%.*]] = load i32, ptr [[GEP_V2_IV]], align 4
; CHECK-VF4IC4-NEXT: [[CMP_V1_IV_3:%.*]] = icmp eq i32 [[LOAD_V1_IV]], 3
; CHECK-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V1_IV_3]], i32 [[RDX]], i32 [[LOAD_V2_IV]]
; CHECK-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i32 @select_variant_i32_from_icmp(
; CHECK-VF1IC4-SAME: ptr [[V1:%.*]], ptr [[V2:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: br label %[[LOOP:.*]]
; CHECK-VF1IC4: [[LOOP]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 3, %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: [[GEP_V1_IV:%.*]] = getelementptr inbounds i32, ptr [[V1]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V1_IV:%.*]] = load i32, ptr [[GEP_V1_IV]], align 4
; CHECK-VF1IC4-NEXT: [[GEP_V2_IV:%.*]] = getelementptr inbounds i32, ptr [[V2]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[LOAD_V2_IV:%.*]] = load i32, ptr [[GEP_V2_IV]], align 4
; CHECK-VF1IC4-NEXT: [[CMP_V1_IV_3:%.*]] = icmp eq i32 [[LOAD_V1_IV]], 3
; CHECK-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V1_IV_3]], i32 [[RDX]], i32 [[LOAD_V2_IV]]
; CHECK-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 3, %entry ], [ %sel, %loop ]
%gep.v1.iv = getelementptr inbounds i32, ptr %v1, i64 %iv
%load.v1.iv = load i32, ptr %gep.v1.iv, align 4
%gep.v2.iv = getelementptr inbounds i32, ptr %v2, i64 %iv
%load.v2.iv = load i32, ptr %gep.v2.iv, align 4
%cmp.v1.iv.3 = icmp eq i32 %load.v1.iv, 3
%sel = select i1 %cmp.v1.iv.3, i32 %rdx, i32 %load.v2.iv
%iv.next = add nuw nsw i64 %iv, 1
%exit.cond = icmp eq i64 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
; We only support selects where the input comes from the same PHI as the
; reduction PHI. In the example below, the select uses the induction
; variable input and the icmp uses the reduction PHI.
define i32 @select_i32_from_icmp_non_redux_phi(i32 %a, i32 %b, i32 %n) {
; CHECK-LABEL: define i32 @select_i32_from_icmp_non_redux_phi(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[A]], %[[ENTRY]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
; CHECK-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[IV]], i32 [[B]]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ]
; CHECK-NEXT: ret i32 [[SEL_LCSSA]]
;
entry:
br label %loop
loop: ; preds = %entry, %loop
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ %a, %entry ], [ %sel, %loop ]
%cmp.rdx.3 = icmp eq i32 %rdx, 3
%sel = select i1 %cmp.rdx.3, i32 %iv, i32 %b
%iv.next = add nuw nsw i32 %iv, 1
%exit.cond = icmp eq i32 %iv.next, %n
br i1 %exit.cond, label %exit, label %loop
exit: ; preds = %loop
ret i32 %sel
}
;.
; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
; CHECK-VF4IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; CHECK-VF4IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
;.
; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
; CHECK-VF4IC4: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; CHECK-VF4IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
;.
; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
; CHECK-VF1IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
; CHECK-VF1IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
; CHECK-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
;.