Files
David Sherwood 1d9775f684 [LV] Change VPLane::getAsRuntimeExpr to use constant 64-bit indices (#193206)
The canonical form preferred by instcombine is to use 64-bit values for
the index when it is a constant. We should try to do the same where
possible in the loop vectoriser as this reduces churn in the compiler.

It also makes other work easier, such as removing extra unnecessary
passes on the RUN line in the test directory which I plan to do
afterwards.
2026-04-22 11:33:42 +01:00

2825 lines
215 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
define i64 @select_icmp_const_1(ptr %a, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_const_1(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 3
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP7]], 3
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_const_1(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP10]], <4 x i64> [[TMP11]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP12]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 3
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP16]], 3
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_const_1(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP16]], i64 [[TMP17]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 3
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP21]], 3
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ 3, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%cmp2 = icmp eq i64 %0, 3
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_icmp_const_2(ptr %a, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_const_2(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 3
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP7]], 3
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[RDX]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_const_2(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_IND]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[VEC_PHI1]], <4 x i64> [[STEP_ADD]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[VEC_PHI2]], <4 x i64> [[STEP_ADD_2]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_PHI3]], <4 x i64> [[STEP_ADD_3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP10]], <4 x i64> [[TMP11]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP12]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 3
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP16]], 3
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[RDX]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_const_2(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[VEC_PHI]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[VEC_PHI1]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[VEC_PHI2]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[VEC_PHI3]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP16]], i64 [[TMP17]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 3
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP21]], 3
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[RDX]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ 3, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%cmp2 = icmp eq i64 %0, 3
%cond = select i1 %cmp2, i64 %rdx, i64 %iv
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_const_3_variable_rdx_start(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP7]], 3
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_const_3_variable_rdx_start(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP10]], <4 x i64> [[TMP11]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP12]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP16]], 3
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_const_3_variable_rdx_start(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP8]], 3
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP9]], 3
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP10]], 3
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP11]], 3
; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP16]], i64 [[TMP17]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TMP21]], 3
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%cmp2 = icmp eq i64 %0, 3
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_fcmp_const_fast(ptr %a, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_fcmp_const_fast(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 2
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = fcmp fast ueq float [[TMP7]], 3.000000e+00
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_fcmp_const_fast(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP10]], <4 x i64> [[TMP11]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP12]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 2
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = fcmp fast ueq float [[TMP16]], 3.000000e+00
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_fcmp_const_fast(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP16]], i64 [[TMP17]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 2
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = fcmp fast ueq float [[TMP21]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ 2, %entry ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp2 = fcmp fast ueq float %0, 3.0
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_fcmp_const(ptr %a, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_fcmp_const(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 2
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = fcmp ueq float [[TMP7]], 3.000000e+00
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_fcmp_const(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP10]], <4 x i64> [[TMP11]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP12]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 2
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = fcmp ueq float [[TMP16]], 3.000000e+00
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_fcmp_const(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP16]], i64 [[TMP17]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 2
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = fcmp ueq float [[TMP21]], 3.000000e+00
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ 2, %entry ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp2 = fcmp ueq float %0, 3.0
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP9]], [[TMP10]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD7]]
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], [[WIDE_LOAD9]]
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], [[WIDE_LOAD10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP15]], <4 x i64> [[TMP16]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP17]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP21]], [[TMP22]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP8]], [[TMP16]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = icmp sgt i64 [[TMP9]], [[TMP17]]
; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = icmp sgt i64 [[TMP10]], [[TMP18]]
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp sgt i64 [[TMP11]], [[TMP19]]
; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP25]] = select i1 [[TMP21]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[TMP22]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP24]], i64 [[TMP25]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP29]], [[TMP30]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_fcmp(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = fcmp ogt float [[TMP9]], [[TMP10]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_fcmp(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP9]], align 4
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD7]]
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD9]]
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP15]], <4 x i64> [[TMP16]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP17]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = fcmp ogt float [[TMP21]], [[TMP22]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_fcmp(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP12]], align 4
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP13]], align 4
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = load float, ptr [[TMP14]], align 4
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP15]], align 4
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = fcmp ogt float [[TMP8]], [[TMP16]]
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = fcmp ogt float [[TMP9]], [[TMP17]]
; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = fcmp ogt float [[TMP10]], [[TMP18]]
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = fcmp ogt float [[TMP11]], [[TMP19]]
; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i64 [[INDEX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP25]] = select i1 [[TMP21]], i64 [[TMP1]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[TMP22]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP24]], i64 [[TMP25]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP26]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = fcmp ogt float [[TMP29]], [[TMP30]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%arrayidx1 = getelementptr inbounds float, ptr %b, i64 %iv
%1 = load float, ptr %arrayidx1, align 4
%cmp2 = fcmp ogt float %0, %1
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_min_valid_iv_start(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: [[IND_END:%.*]] = add i64 -9223372036854775807, [[N_VEC]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805, i64 -9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP9]], [[TMP10]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC1-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_min_valid_iv_start(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: [[IND_END:%.*]] = add i64 -9223372036854775807, [[N_VEC]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805, i64 -9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD8]]
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], [[WIDE_LOAD9]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], [[WIDE_LOAD10]]
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD7]], [[WIDE_LOAD11]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP14]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI4]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP15]], <4 x i64> [[TMP16]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP17]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX13:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX12]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX13]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP20]], -9223372036854775808
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP21]], [[TMP22]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_min_valid_iv_start(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: [[IND_END:%.*]] = add i64 -9223372036854775807, [[N_VEC]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 -9223372036854775807, [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP6]]
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP7]]
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP16]], align 8
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP17]], align 8
; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP18]], align 8
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP19]], align 8
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP12]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp sgt i64 [[TMP13]], [[TMP21]]
; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = icmp sgt i64 [[TMP14]], [[TMP22]]
; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = icmp sgt i64 [[TMP15]], [[TMP23]]
; CHECK-VF1IC4-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP1]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP30]] = select i1 [[TMP26]], i64 [[TMP2]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[TMP31]] = select i1 [[TMP27]], i64 [[TMP3]], i64 [[VEC_PHI4]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP28]], i64 [[TMP29]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP30]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX5]], i64 [[TMP31]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX6]], -9223372036854775808
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX6]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775807, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP33:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP34:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP33]], [[TMP34]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF1IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv.j = phi i64 [ %inc3, %for.body ], [ -9223372036854775807, %entry]
%iv.i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, i64 %iv.j, i64 %rdx
%inc = add nuw nsw i64 %iv.i, 1
%inc3 = add nsw i64 %iv.j, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_unsigned_iv_range(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*:]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804
; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP7]], 0
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP7]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ -4, %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ 9223372036854775804, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP9]], [[TMP10]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC1-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_unsigned_iv_range(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*:]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD7]]
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], [[WIDE_LOAD9]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], [[WIDE_LOAD10]]
; CHECK-VF4IC4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775792
; CHECK-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[TMP14]], <4 x i64> [[TMP15]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP16]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i64> @llvm.umax.v4i64(<4 x i64> [[RDX_MINMAX11]], <4 x i64> [[TMP17]])
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_MINMAX12]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP19]], 0
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP19]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ -16, %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ 9223372036854775792, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP21]], [[TMP22]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_unsigned_iv_range(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*:]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 -9223372036854775808, [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[IV_I]], 1
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV_I]], 2
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[IV_I]], 3
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP37:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP16]], align 8
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP17]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]]
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp sgt i64 [[TMP11]], [[TMP19]]
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP12]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp sgt i64 [[TMP13]], [[TMP21]]
; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[CMP2]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP0]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[TMP1]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP2]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV_I]], 4
; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9223372036854775804
; CHECK-VF1IC4-NEXT: br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP26]], i64 [[TMP27]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umax.i64(i64 [[RDX_MINMAX]], i64 [[TMP28]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umax.i64(i64 [[RDX_MINMAX4]], i64 [[TMP29]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 0
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: br label %[[SCALAR_PH:.*]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = phi i64 [ -4, %[[SCALAR_PH]] ], [ [[INC3:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[IV_I1:%.*]] = phi i64 [ 9223372036854775804, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I1]]
; CHECK-VF1IC4-NEXT: [[TMP31:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I1]]
; CHECK-VF1IC4-NEXT: [[TMP32:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP3]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I1]], 1
; CHECK-VF1IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 9223372036854775806
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv.j = phi i64 [ 9223372036854775808, %entry], [ %inc3, %for.body ]
%iv.i = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %for.body ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, i64 %iv.j, i64 %rdx
%inc = add nuw nsw i64 %iv.i, 1
%inc3 = add nsw i64 %iv.j, 1
%exitcond.not = icmp eq i64 %inc, 9223372036854775806
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
; Negative tests
define float @not_vectorized_select_float_induction_icmp(ptr %a, ptr %b, float %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define float @not_vectorized_select_float_induction_icmp(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], float [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fmul float 1.000000e+00, [[DOTCAST]]
; CHECK-VF4IC1-NEXT: [[TMP13:%.*]] = fadd float 0.000000e+00, [[TMP3]]
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[RDX_START]], i64 0
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[TMP8]] = select i1 [[TMP7]], <4 x i1> [[TMP5]], <4 x i1> [[TMP2]]
; CHECK-VF4IC1-NEXT: [[TMP9]] = select i1 [[TMP7]], <4 x float> [[VEC_IND]], <4 x float> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = fadd <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i64 0
; CHECK-VF4IC1-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> [[TMP9]], <4 x i1> [[TMP8]], float [[TMP11]])
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY1:.*]]
; CHECK-VF4IC1: [[FOR_BODY1]]:
; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[FIV:%.*]] = phi float [ [[CONV3:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi float [ [[COND:%.*]], %[[FOR_BODY1]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV1]]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], float [[FIV]], float [[RDX]]
; CHECK-VF4IC1-NEXT: [[CONV3]] = fadd float [[FIV]], 1.000000e+00
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi float [ [[COND]], %[[FOR_BODY1]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret float [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define float @not_vectorized_select_float_induction_icmp(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], float [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = fmul float 1.000000e+00, [[DOTCAST]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = fadd float 0.000000e+00, [[TMP3]]
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[RDX_START]], i64 0
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[TMP25:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[FOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = fadd <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = fadd <4 x float> [[STEP_ADD]], splat (float 4.000000e+00)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = fadd <4 x float> [[STEP_ADD_2]], splat (float 4.000000e+00)
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[ARRAYIDX]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i64, ptr [[ARRAYIDX]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[ARRAYIDX]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP26]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP30]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP34]], align 8
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP40:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP35]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP36]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP40]], align 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], [[WIDE_LOAD9]]
; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], [[WIDE_LOAD10]]
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP15]]
; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = or <4 x i1> [[TMP6]], [[TMP19]]
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = freeze <4 x i1> [[TMP16]]
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP20]], [[TMP21]]
; CHECK-VF4IC4-NEXT: [[TMP23:%.*]] = freeze <4 x i1> [[TMP17]]
; CHECK-VF4IC4-NEXT: [[TMP24:%.*]] = or <4 x i1> [[TMP22]], [[TMP23]]
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP24]])
; CHECK-VF4IC4-NEXT: [[TMP8]] = select i1 [[TMP7]], <4 x i1> [[TMP5]], <4 x i1> [[TMP2]]
; CHECK-VF4IC4-NEXT: [[TMP27]] = select i1 [[TMP7]], <4 x i1> [[TMP15]], <4 x i1> [[TMP14]]
; CHECK-VF4IC4-NEXT: [[TMP28]] = select i1 [[TMP7]], <4 x i1> [[TMP16]], <4 x i1> [[TMP18]]
; CHECK-VF4IC4-NEXT: [[TMP29]] = select i1 [[TMP7]], <4 x i1> [[TMP17]], <4 x i1> [[TMP25]]
; CHECK-VF4IC4-NEXT: [[TMP9]] = select i1 [[TMP7]], <4 x float> [[VEC_IND]], <4 x float> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP31]] = select i1 [[TMP7]], <4 x float> [[STEP_ADD]], <4 x float> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP32]] = select i1 [[TMP7]], <4 x float> [[STEP_ADD_2]], <4 x float> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP33]] = select i1 [[TMP7]], <4 x float> [[STEP_ADD_3]], <4 x float> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = fadd <4 x float> [[STEP_ADD_3]], splat (float 4.000000e+00)
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i64 0
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> [[TMP9]], <4 x i1> [[TMP8]], float [[TMP11]])
; CHECK-VF4IC4-NEXT: [[TMP37:%.*]] = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> [[TMP31]], <4 x i1> [[TMP27]], float [[TMP12]])
; CHECK-VF4IC4-NEXT: [[TMP38:%.*]] = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> [[TMP32]], <4 x i1> [[TMP28]], float [[TMP37]])
; CHECK-VF4IC4-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> [[TMP33]], <4 x i1> [[TMP29]], float [[TMP38]])
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP39]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY1:.*]]
; CHECK-VF4IC4: [[FOR_BODY1]]:
; CHECK-VF4IC4-NEXT: [[IV1:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[FIV:%.*]] = phi float [ [[CONV3:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL11]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi float [ [[COND:%.*]], %[[FOR_BODY1]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV1]]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], float [[FIV]], float [[RDX]]
; CHECK-VF4IC4-NEXT: [[CONV3]] = fadd float [[FIV]], 1.000000e+00
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi float [ [[COND]], %[[FOR_BODY1]] ], [ [[TMP39]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret float [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define float @not_vectorized_select_float_induction_icmp(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], float [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[FIV:%.*]] = phi float [ [[CONV3:%.*]], %[[FOR_BODY]] ], [ 0.000000e+00, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi float [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], float [[FIV]], float [[RDX]]
; CHECK-VF1IC4-NEXT: [[CONV3]] = fadd float [[FIV]], 1.000000e+00
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi float [ [[COND]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: ret float [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%fiv = phi float [ %conv3, %for.body ], [ 0.000000e+00, %entry ]
%rdx = phi float [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, float %fiv, float %rdx
%conv3 = fadd float %fiv, 1.000000e+00
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret float %cond
}
; The sentinel value for increasing-IV vectorization is -LONG_MAX, and since
; the IV hits this value, it is vectorized as a generic last-active reduction.
define i64 @not_vectorized_select_icmp_iv_out_of_bound(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @not_vectorized_select_icmp_iv_out_of_bound(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: [[TMP11:%.*]] = add i64 -9223372036854775808, [[N_VEC]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC1-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI1]], [[TMP3]]
; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[TMP9]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP7]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC1-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @not_vectorized_select_icmp_iv_out_of_bound(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = add i64 -9223372036854775808, [[N_VEC]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775807, i64 -9223372036854775806, i64 -9223372036854775805>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nsw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i64>, ptr [[TMP21]], align 8
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD8]], [[WIDE_LOAD12]]
; CHECK-VF4IC4-NEXT: [[TMP24:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD9]], [[WIDE_LOAD13]]
; CHECK-VF4IC4-NEXT: [[TMP25:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD10]], [[WIDE_LOAD14]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = or <4 x i1> [[VEC_PHI4]], [[TMP3]]
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI5]], [[TMP10]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI6]], [[TMP24]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI7]], [[TMP25]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND1]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP19]] = select <4 x i1> [[TMP24]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP20]] = select <4 x i1> [[TMP25]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP17]], <4 x i64> [[TMP18]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX16:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP19]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX16]], <4 x i64> [[TMP20]])
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX18]])
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP14]], [[TMP13]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX18:%.*]] = or <4 x i1> [[TMP15]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX17:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX18]]
; CHECK-VF4IC4-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX17]])
; CHECK-VF4IC4-NEXT: [[TMP26:%.*]] = freeze i1 [[TMP23]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP26]], i64 [[TMP22]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV_J:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[IV_I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL19]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I]], 1
; CHECK-VF4IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @not_vectorized_select_icmp_iv_out_of_bound(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i64 -9223372036854775808, [[N_VEC]]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV_I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI5:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI6:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI7:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[IV_J:%.*]] = add i64 -9223372036854775808, [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = add i64 [[IV_J]], 1
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[IV_J]], 2
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[IV_J]], 3
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV_I]], 1
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[IV_I]], 2
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[IV_I]], 3
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I]]
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP5]]
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP6]]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP16]], align 8
; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP17]], align 8
; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP18]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP12]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp sgt i64 [[TMP13]], [[TMP21]]
; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = icmp sgt i64 [[TMP14]], [[TMP22]]
; CHECK-VF1IC4-NEXT: [[TMP27]] = or i1 [[VEC_PHI4]], [[CMP2]]
; CHECK-VF1IC4-NEXT: [[TMP28]] = or i1 [[VEC_PHI5]], [[TMP24]]
; CHECK-VF1IC4-NEXT: [[TMP29]] = or i1 [[VEC_PHI6]], [[TMP25]]
; CHECK-VF1IC4-NEXT: [[TMP30]] = or i1 [[VEC_PHI7]], [[TMP26]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV_J]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[TMP32]] = select i1 [[TMP24]], i64 [[TMP11]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP33]] = select i1 [[TMP25]], i64 [[TMP2]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP34]] = select i1 [[TMP26]], i64 [[TMP3]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV_I]], 4
; CHECK-VF1IC4-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 [[TMP32]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX9:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP33]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX11:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX9]], i64 [[TMP34]])
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP28]], [[TMP27]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX10:%.*]] = or i1 [[TMP29]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX11:%.*]] = or i1 [[TMP30]], [[BIN_RDX10]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[BIN_RDX11]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX11]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ -9223372036854775808, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY1:.*]]
; CHECK-VF1IC4: [[FOR_BODY1]]:
; CHECK-VF1IC4-NEXT: [[IV_J1:%.*]] = phi i64 [ [[INC3:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[IV_I1:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL12]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX1:%.*]] = phi i64 [ [[COND1:%.*]], %[[FOR_BODY1]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_I1]]
; CHECK-VF1IC4-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_I1]]
; CHECK-VF1IC4-NEXT: [[TMP37:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]]
; CHECK-VF1IC4-NEXT: [[COND1]] = select i1 [[CMP3]], i64 [[IV_J1]], i64 [[RDX1]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV_I1]], 1
; CHECK-VF1IC4-NEXT: [[INC3]] = add nsw i64 [[IV_J1]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND1]], %[[FOR_BODY1]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv.j = phi i64 [ %inc3, %for.body ], [ -9223372036854775808, %entry]
%iv.i = phi i64 [ %inc, %for.body ], [ 0, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv.i
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv.i
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, i64 %iv.j, i64 %rdx
%inc = add nuw nsw i64 %iv.i, 1
%inc3 = add nsw i64 %iv.j, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
define i64 @not_vectorized_select_icmp_non_const_iv_start_value(ptr %a, ptr %b, i64 %ivstart, i64 %rdx.start, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[IVSTART:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[IVSTART]]
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP12]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP12]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP12]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: [[TMP13:%.*]] = add i64 [[IVSTART]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[IVSTART]], i64 0
; CHECK-VF4IC1-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC1-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IVSTART]], [[INDEX]]
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-VF4IC1-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI1]], [[TMP4]]
; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]])
; CHECK-VF4IC1-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[TMP10]]
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[RDX_START]]
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP12]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[IVSTART]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF4IC4-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[IVSTART:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[IVSTART]]
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP12]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP12]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP12]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = add i64 [[IVSTART]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[IVSTART]], i64 0
; CHECK-VF4IC4-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
; CHECK-VF4IC4-NEXT: [[INDUCTION:%.*]] = add nuw nsw <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <4 x i64> [[VEC_IND1]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IVSTART]], [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i64>, ptr [[TMP22]], align 8
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD8]], [[WIDE_LOAD12]]
; CHECK-VF4IC4-NEXT: [[TMP25:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD9]], [[WIDE_LOAD13]]
; CHECK-VF4IC4-NEXT: [[TMP26:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD10]], [[WIDE_LOAD14]]
; CHECK-VF4IC4-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI4]], [[TMP4]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = or <4 x i1> [[VEC_PHI5]], [[TMP11]]
; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI6]], [[TMP25]]
; CHECK-VF4IC4-NEXT: [[TMP17]] = or <4 x i1> [[VEC_PHI7]], [[TMP26]]
; CHECK-VF4IC4-NEXT: [[TMP18]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND1]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP19]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP20]] = select <4 x i1> [[TMP25]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP21]] = select <4 x i1> [[TMP26]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP18]], <4 x i64> [[TMP19]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX16:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP20]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[RDX_MINMAX16]], <4 x i64> [[TMP21]])
; CHECK-VF4IC4-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX18]])
; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP15]], [[TMP14]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX18:%.*]] = or <4 x i1> [[TMP16]], [[BIN_RDX]]
; CHECK-VF4IC4-NEXT: [[BIN_RDX17:%.*]] = or <4 x i1> [[TMP17]], [[BIN_RDX18]]
; CHECK-VF4IC4-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX17]])
; CHECK-VF4IC4-NEXT: [[TMP27:%.*]] = freeze i1 [[TMP24]]
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP27]], i64 [[TMP23]], i64 [[RDX_START]]
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP12]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ [[IVSTART]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[COND_LCSSA]]
;
; CHECK-VF1IC4-LABEL: define i64 @not_vectorized_select_icmp_non_const_iv_start_value(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[IVSTART:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[IVSTART]]
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP5]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP5]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = add i64 [[IVSTART]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI5:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI6:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI7:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[FOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = add i64 [[IVSTART]], [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[IV]], 2
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 3
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX1]], align 8
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP14]], align 8
; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP15]], align 8
; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP16]], align 8
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[TMP0]], [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = icmp sgt i64 [[TMP10]], [[TMP18]]
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp sgt i64 [[TMP11]], [[TMP19]]
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP12]], [[TMP20]]
; CHECK-VF1IC4-NEXT: [[TMP25]] = or i1 [[VEC_PHI4]], [[CMP2]]
; CHECK-VF1IC4-NEXT: [[TMP26]] = or i1 [[VEC_PHI5]], [[TMP22]]
; CHECK-VF1IC4-NEXT: [[TMP27]] = or i1 [[VEC_PHI6]], [[TMP23]]
; CHECK-VF1IC4-NEXT: [[TMP28]] = or i1 [[VEC_PHI7]], [[TMP24]]
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[TMP30]] = select i1 [[TMP22]], i64 [[TMP2]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP31]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP32]] = select i1 [[TMP24]], i64 [[TMP4]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 [[TMP30]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX9:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX]], i64 [[TMP31]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX11:%.*]] = call i64 @llvm.smax.i64(i64 [[RDX_MINMAX9]], i64 [[TMP32]])
; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP26]], [[TMP25]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX10:%.*]] = or i1 [[TMP27]], [[BIN_RDX]]
; CHECK-VF1IC4-NEXT: [[BIN_RDX11:%.*]] = or i1 [[TMP28]], [[BIN_RDX10]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[BIN_RDX11]]
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX11]], i64 [[RDX_START]]
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[IVSTART]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY1:.*]]
; CHECK-VF1IC4: [[FOR_BODY1]]:
; CHECK-VF1IC4-NEXT: [[IV1:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY1]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX1:%.*]] = phi i64 [ [[COND1:%.*]], %[[FOR_BODY1]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]]
; CHECK-VF1IC4-NEXT: [[TMP34:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
; CHECK-VF1IC4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV1]]
; CHECK-VF1IC4-NEXT: [[TMP35:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i64 [[TMP34]], [[TMP35]]
; CHECK-VF1IC4-NEXT: [[COND1]] = select i1 [[CMP3]], i64 [[IV1]], i64 [[RDX1]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV1]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND1]], %[[FOR_BODY1]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[COND_LCSSA]]
;
entry:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ %ivstart, %entry ]
%rdx = phi i64 [ %cond, %for.body ], [ %rdx.start, %entry ]
%arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
%0 = load i64, ptr %arrayidx, align 8
%arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %iv
%1 = load i64, ptr %arrayidx1, align 8
%cmp2 = icmp sgt i64 %0, %1
%cond = select i1 %cmp2, i64 %iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit, label %for.body
exit:
ret i64 %cond
}
; Test that we can vectorize a loop with a FindLastIV reduction when the IV
; value is transformed via NOT (xor IV, -1). This requires computing the SCEV
; for the NOT operation to enable sentinel-based vectorization.
define i64 @select_icmp_xor_not_iv(ptr %a, i64 %n) {
; CHECK-VF4IC1-LABEL: define i64 @select_icmp_xor_not_iv(
; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC1-NEXT: [[CMP_GUARD:%.*]] = icmp sgt i64 [[N]], 0
; CHECK-VF4IC1-NEXT: br i1 [[CMP_GUARD]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]:
; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC1: [[VECTOR_PH]]:
; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC1: [[VECTOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = xor <4 x i64> [[VEC_IND]], splat (i64 -1)
; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP2]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP3]])
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP5]], 9223372036854775807
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP5]], i64 0
; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC1: [[SCALAR_PH]]:
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC1: [[FOR_BODY]]:
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-VF4IC1-NEXT: [[CMP2:%.*]] = icmp eq i8 [[TMP6]], 0
; CHECK-VF4IC1-NEXT: [[NOT_IV:%.*]] = xor i64 [[IV]], -1
; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[NOT_IV]], i64 [[RDX]]
; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]:
; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC1-NEXT: br label %[[EXIT]]
; CHECK-VF4IC1: [[EXIT]]:
; CHECK-VF4IC1-NEXT: [[RESULT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
; CHECK-VF4IC1-NEXT: ret i64 [[RESULT]]
;
; CHECK-VF4IC4-LABEL: define i64 @select_icmp_xor_not_iv(
; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF4IC4-NEXT: [[CMP_GUARD:%.*]] = icmp sgt i64 [[N]], 0
; CHECK-VF4IC4-NEXT: br i1 [[CMP_GUARD]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]:
; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF4IC4: [[VECTOR_PH]]:
; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF4IC4: [[VECTOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4
; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD4]], zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD5]], zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD6]], zeroinitializer
; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = xor <4 x i64> [[VEC_IND]], splat (i64 -1)
; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = xor <4 x i64> [[STEP_ADD]], splat (i64 -1)
; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = xor <4 x i64> [[STEP_ADD_2]], splat (i64 -1)
; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = xor <4 x i64> [[STEP_ADD_3]], splat (i64 -1)
; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP8]], <4 x i64> [[VEC_PHI]]
; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP9]], <4 x i64> [[VEC_PHI1]]
; CHECK-VF4IC4-NEXT: [[TMP14]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[VEC_PHI2]]
; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[VEC_PHI3]]
; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD_3]], splat (i64 4)
; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-VF4IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP12]], <4 x i64> [[TMP13]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP14]])
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX7]], <4 x i64> [[TMP15]])
; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX8]])
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP17]], 9223372036854775807
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP17]], i64 0
; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF4IC4: [[SCALAR_PH]]:
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF4IC4: [[FOR_BODY]]:
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-VF4IC4-NEXT: [[CMP2:%.*]] = icmp eq i8 [[TMP18]], 0
; CHECK-VF4IC4-NEXT: [[NOT_IV:%.*]] = xor i64 [[IV]], -1
; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[NOT_IV]], i64 [[RDX]]
; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]:
; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF4IC4-NEXT: br label %[[EXIT]]
; CHECK-VF4IC4: [[EXIT]]:
; CHECK-VF4IC4-NEXT: [[RESULT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
; CHECK-VF4IC4-NEXT: ret i64 [[RESULT]]
;
; CHECK-VF1IC4-LABEL: define i64 @select_icmp_xor_not_iv(
; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]:
; CHECK-VF1IC4-NEXT: [[CMP_GUARD:%.*]] = icmp sgt i64 [[N]], 0
; CHECK-VF1IC4-NEXT: br i1 [[CMP_GUARD]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]:
; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK-VF1IC4: [[VECTOR_PH]]:
; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK-VF1IC4: [[VECTOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 3
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]]
; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP3]], align 1
; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP4]], align 1
; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP5]], align 1
; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP6]], align 1
; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP7]], 0
; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i8 [[TMP8]], 0
; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP9]], 0
; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i8 [[TMP10]], 0
; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = xor i64 [[INDEX]], -1
; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = xor i64 [[TMP0]], -1
; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = xor i64 [[TMP1]], -1
; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = xor i64 [[TMP2]], -1
; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP11]], i64 [[TMP15]], i64 [[VEC_PHI]]
; CHECK-VF1IC4-NEXT: [[TMP20]] = select i1 [[TMP12]], i64 [[TMP16]], i64 [[VEC_PHI1]]
; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP13]], i64 [[TMP17]], i64 [[VEC_PHI2]]
; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP14]], i64 [[TMP18]], i64 [[VEC_PHI3]]
; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-VF1IC4: [[MIDDLE_BLOCK]]:
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP19]], i64 [[TMP20]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP21]])
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP22]])
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 0
; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; CHECK-VF1IC4: [[SCALAR_PH]]:
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-VF1IC4: [[FOR_BODY]]:
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-VF1IC4-NEXT: [[CMP2:%.*]] = icmp eq i8 [[TMP24]], 0
; CHECK-VF1IC4-NEXT: [[NOT_IV:%.*]] = xor i64 [[IV]], -1
; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP2]], i64 [[NOT_IV]], i64 [[RDX]]
; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]:
; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: br label %[[EXIT]]
; CHECK-VF1IC4: [[EXIT]]:
; CHECK-VF1IC4-NEXT: [[RESULT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
; CHECK-VF1IC4-NEXT: ret i64 [[RESULT]]
;
entry:
%cmp.guard = icmp sgt i64 %n, 0
br i1 %cmp.guard, label %for.body.preheader, label %exit
for.body.preheader:
br label %for.body
for.body:
%iv = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%rdx = phi i64 [ %cond, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %iv
%0 = load i8, ptr %arrayidx, align 1
%cmp2 = icmp eq i8 %0, 0
%not.iv = xor i64 %iv, -1
%cond = select i1 %cmp2, i64 %not.iv, i64 %rdx
%inc = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %inc, %n
br i1 %exitcond.not, label %exit.loopexit, label %for.body
exit.loopexit:
%cond.lcssa = phi i64 [ %cond, %for.body ]
br label %exit
exit:
%result = phi i64 [ 0, %entry ], [ %cond.lcssa, %exit.loopexit ]
ret i64 %result
}