The canonical form preferred by instcombine is to use 64-bit values for the index when it is a constant. We should try to do the same where possible in the loop vectoriser as this reduces churn in the compiler. It also makes other work easier, such as removing extra unnecessary passes on the RUN line in the test directory which I plan to do afterwards.
1393 lines
98 KiB
LLVM
1393 lines
98 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC1VF4 %s
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF4 %s
|
|
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF1 %s
|
|
|
|
define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC1VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC4VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = sub <4 x i64> [[STEP_ADD]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = sub <4 x i64> [[STEP_ADD_2]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[TMP19]] = sub <4 x i64> [[STEP_ADD_3]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX11]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
|
|
; IC4VF1-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
|
|
; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
|
|
; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP15]], i64 [[TMP16]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 19999, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%cmp.a.3 = icmp sgt i64 %ld.a, 3
|
|
%spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%exit.cond = icmp eq i64 %iv, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i64 %spec.select
|
|
}
|
|
|
|
@table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
|
|
|
|
define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
|
|
; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF]]:
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_LOAD_IF]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF15]]:
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[TMP13]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ [[TMP3]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP8]], %[[PRED_LOAD_IF15]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF17]]:
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
|
|
; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP19]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP10]], %[[PRED_LOAD_IF17]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF19]]:
|
|
; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
|
|
; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP11]], i16 [[TMP25]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
|
|
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP11]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP14]], %[[PRED_LOAD_IF19]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF21]]:
|
|
; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
|
|
; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
|
|
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP20]], %[[PRED_LOAD_IF21]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF23]]:
|
|
; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
|
|
; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP37]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
|
|
; IC4VF4-NEXT: [[TMP23:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP22]], %[[PRED_LOAD_IF23]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF25]]:
|
|
; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
|
|
; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
|
|
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP23]], i16 [[TMP43]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
|
|
; IC4VF4-NEXT: [[TMP28:%.*]] = phi <4 x i16> [ [[TMP23]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP26]], %[[PRED_LOAD_IF25]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF27]]:
|
|
; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
|
|
; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
|
|
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP49]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
|
|
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP28]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP32]], %[[PRED_LOAD_IF27]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF29]]:
|
|
; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
|
|
; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
|
|
; IC4VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
|
|
; IC4VF4-NEXT: [[TMP35:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP34]], %[[PRED_LOAD_IF29]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF31]]:
|
|
; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
|
|
; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
|
|
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP61]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
|
|
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP35]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP38]], %[[PRED_LOAD_IF31]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF33]]:
|
|
; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
|
|
; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
|
|
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP67]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
|
|
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP44]], %[[PRED_LOAD_IF33]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF35]]:
|
|
; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
|
|
; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
|
|
; IC4VF4-NEXT: [[TMP46:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP73]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
|
|
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP46]], %[[PRED_LOAD_IF35]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF37]]:
|
|
; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
|
|
; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
|
|
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
|
|
; IC4VF4-NEXT: [[TMP52:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP50]], %[[PRED_LOAD_IF37]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF39]]:
|
|
; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
|
|
; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
|
|
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> [[TMP52]], i16 [[TMP85]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
|
|
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ [[TMP52]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP56]], %[[PRED_LOAD_IF39]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF41]]:
|
|
; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
|
|
; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
|
|
; IC4VF4-NEXT: [[TMP58:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP91]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
|
|
; IC4VF4-NEXT: [[TMP59:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP58]], %[[PRED_LOAD_IF41]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF43]]:
|
|
; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
|
|
; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
|
|
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP59]], i16 [[TMP97]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
|
|
; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x i16> [ [[TMP59]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP62]], %[[PRED_LOAD_IF43]] ]
|
|
; IC4VF4-NEXT: [[TMP100:%.*]] = icmp ugt <4 x i16> [[TMP27]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP82:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP83:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP88:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP89:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP82]], <4 x i16> [[TMP83]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX31:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP88]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX31]], <4 x i16> [[TMP89]])
|
|
; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
|
|
; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP3]], align 1
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 1
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP5]], align 1
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp ugt i16 [[TMP7]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp ugt i16 [[TMP8]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp ugt i16 [[TMP9]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp ugt i16 [[TMP10]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
|
|
; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
|
|
; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i16 [ 12, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i16 [ 0, %entry ], [ %spec.select, %loop ]
|
|
%gep.table.iv = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %iv
|
|
%ld.table = load i16, ptr %gep.table.iv, align 1
|
|
%cmp.table.val = icmp ugt i16 %ld.table, %val
|
|
%iv.next = add nsw i16 %iv, -1
|
|
%spec.select = select i1 %cmp.table.val, i16 %iv.next, i16 %rdx
|
|
%exit.cond = icmp eq i16 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
%spec.select.lcssa = phi i16 [ %spec.select, %loop ]
|
|
ret i16 %spec.select.lcssa
|
|
}
|
|
|
|
@tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
|
|
|
|
define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
|
|
; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC1VF4-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
|
|
; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4)
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC4VF4-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF]]:
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 12
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE]]:
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_LOAD_IF]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF15]]:
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 11
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> [[TMP3]], half [[TMP13]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ [[TMP3]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP8]], %[[PRED_LOAD_IF15]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF17]]:
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 10
|
|
; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP19]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP10]], %[[PRED_LOAD_IF17]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF19]]:
|
|
; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 9
|
|
; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP11]], half [[TMP25]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
|
|
; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP11]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP14]], %[[PRED_LOAD_IF19]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF21]]:
|
|
; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 8
|
|
; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
|
|
; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP20]], %[[PRED_LOAD_IF21]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF23]]:
|
|
; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 7
|
|
; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP37]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
|
|
; IC4VF4-NEXT: [[TMP23:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP22]], %[[PRED_LOAD_IF23]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF25]]:
|
|
; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 6
|
|
; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
|
|
; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP23]], half [[TMP43]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
|
|
; IC4VF4-NEXT: [[TMP28:%.*]] = phi <4 x half> [ [[TMP23]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP26]], %[[PRED_LOAD_IF25]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF27]]:
|
|
; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 5
|
|
; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
|
|
; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> [[TMP28]], half [[TMP49]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
|
|
; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP28]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP32]], %[[PRED_LOAD_IF27]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF29]]:
|
|
; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 4
|
|
; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
|
|
; IC4VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
|
|
; IC4VF4-NEXT: [[TMP35:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP34]], %[[PRED_LOAD_IF29]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF31]]:
|
|
; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 3
|
|
; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
|
|
; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP35]], half [[TMP61]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
|
|
; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP35]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP38]], %[[PRED_LOAD_IF31]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF33]]:
|
|
; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 2
|
|
; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
|
|
; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP67]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
|
|
; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP44]], %[[PRED_LOAD_IF33]] ]
|
|
; IC4VF4-NEXT: br i1 true, label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF35]]:
|
|
; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 1
|
|
; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
|
|
; IC4VF4-NEXT: [[TMP46:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP73]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
|
|
; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP46]], %[[PRED_LOAD_IF35]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF37]]:
|
|
; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 0
|
|
; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
|
|
; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i64 0
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
|
|
; IC4VF4-NEXT: [[TMP52:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP50]], %[[PRED_LOAD_IF37]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF39]]:
|
|
; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -1
|
|
; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
|
|
; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> [[TMP52]], half [[TMP85]], i64 1
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
|
|
; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ [[TMP52]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP56]], %[[PRED_LOAD_IF39]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF41]]:
|
|
; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -2
|
|
; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
|
|
; IC4VF4-NEXT: [[TMP58:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP91]], i64 2
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
|
|
; IC4VF4-NEXT: [[TMP59:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP58]], %[[PRED_LOAD_IF41]] ]
|
|
; IC4VF4-NEXT: br i1 false, label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44:.*]]
|
|
; IC4VF4: [[PRED_LOAD_IF43]]:
|
|
; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 -3
|
|
; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
|
|
; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP59]], half [[TMP97]], i64 3
|
|
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
|
|
; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
|
|
; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x half> [ [[TMP59]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP62]], %[[PRED_LOAD_IF43]] ]
|
|
; IC4VF4-NEXT: [[TMP100:%.*]] = fcmp ugt <4 x half> [[TMP27]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
|
|
; IC4VF4-NEXT: [[TMP76:%.*]] = select <4 x i1> [[TMP100]], <4 x i16> <i16 11, i16 10, i16 9, i16 8>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP77:%.*]] = select <4 x i1> [[TMP101]], <4 x i16> <i16 7, i16 6, i16 5, i16 4>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP70:%.*]] = select <4 x i1> [[TMP102]], <4 x i16> <i16 3, i16 2, i16 1, i16 0>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP71:%.*]] = select <4 x i1> [[TMP103]], <4 x i16> <i16 -1, i16 -2, i16 -3, i16 -4>, <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP82:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP76]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP83:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP77]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP88:%.*]] = select <4 x i1> splat (i1 true), <4 x i16> [[TMP70]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: [[TMP89:%.*]] = select <4 x i1> zeroinitializer, <4 x i16> [[TMP71]], <4 x i16> splat (i16 32767)
|
|
; IC4VF4-NEXT: br label %[[MIDDLE_BLOCK:.*]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP82]], <4 x i16> [[TMP83]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX31:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP88]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX31]], <4 x i16> [[TMP89]])
|
|
; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
|
|
; IC4VF1-SAME: half noundef [[VAL:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP3]], align 1
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load half, ptr [[TMP4]], align 1
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load half, ptr [[TMP5]], align 1
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load half, ptr [[TMP6]], align 1
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = fcmp ugt half [[TMP7]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = fcmp ugt half [[TMP8]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = fcmp ugt half [[TMP9]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = fcmp ugt half [[TMP10]], [[VAL]]
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
|
|
; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
|
|
; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
|
|
; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: ret i16 [[RDX_SELECT]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i16 [ 12, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i16 [ 0, %entry ], [ %spec.select, %loop ]
|
|
%gep.table.iv = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %iv
|
|
%ld.table = load half, ptr %gep.table.iv, align 1
|
|
%cmp.table.val = fcmp ugt half %ld.table, %val
|
|
%iv.next = add nsw i16 %iv, -1
|
|
%spec.select = select i1 %cmp.table.val, i16 %iv.next, i16 %rdx
|
|
%exit.cond = icmp eq i16 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
%spec.select.lcssa = phi i16 [ %spec.select, %loop ]
|
|
ret i16 %spec.select.lcssa
|
|
}
|
|
|
|
; The signed sentinel value for decreasing-IV vectorization is LONG_MAX, and since
|
|
; the IV hits this value with smin vectorization, it needs to be vectorized with a
|
|
; an unsigned sentinel and umin instead.
|
|
define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC1VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP4]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], -1
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
|
|
; IC1VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC4VF4-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 9223372036854775807, i64 9223372036854775806, i64 9223372036854775805, i64 9223372036854775804>, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = sub <4 x i64> [[STEP_ADD]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = sub <4 x i64> [[STEP_ADD_2]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
|
|
; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[TMP19]] = sub <4 x i64> [[STEP_ADD_3]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX11]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], -1
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
|
|
; IC4VF4-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_unsigned(
|
|
; IC4VF1-SAME: ptr [[A:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
|
|
; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], -9223372036854775808
|
|
; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP15]], i64 [[TMP16]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
|
|
; IC4VF1-NEXT: br label %[[EXIT:.*]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: ret i64 [[RDX_SELECT]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 9223372036854775807, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ 331, %entry ], [ %spec.select, %loop ]
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%cmp.a.3 = icmp sgt i64 %ld.a, 3
|
|
%spec.select = select i1 %cmp.a.3, i64 %iv, i64 %rdx
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%exit.cond = icmp eq i64 %iv, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i64 %spec.select
|
|
}
|
|
|
|
; The unsigned sentinel value for decreasing-IV vectorization is ULONG_MAX,
|
|
; and since the IV hits this value, it is impossible to vectorize this case.
|
|
; In this test, %iv's range will include both signed and unsigned
|
|
; maximum (sentinel) values.
|
|
define i64 @select_decreasing_induction_icmp_iv_out_of_bound(ptr %a, ptr %b, i64 %rdx.start) {
|
|
; CHECK-LABEL: define i64 @select_decreasing_induction_icmp_iv_out_of_bound(
|
|
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*]]:
|
|
; CHECK-NEXT: br label %[[LOOP:.*]]
|
|
; CHECK: [[LOOP]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
|
|
; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
|
|
; CHECK-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
|
|
; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
|
|
; CHECK-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
|
|
; CHECK-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
|
|
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %loop ]
|
|
%iv.next = add i64 %iv, -1
|
|
%gep.a.iv = getelementptr inbounds i8, ptr %a, i64 %iv.next
|
|
%ld.a = load i8, ptr %gep.a.iv, align 1
|
|
%gep.b.iv = getelementptr inbounds i8, ptr %b, i64 %iv.next
|
|
%ld.b = load i8, ptr %gep.b.iv, align 1
|
|
%cmp.a.b = icmp sgt i8 %ld.a, %ld.b
|
|
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
|
|
%exit.cond = icmp eq i64 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i64 %cond
|
|
}
|
|
|
|
; 0 is the sentinel value, and -1 is just within the bounds for vectorization.
|
|
define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(ptr %a, ptr %b, i64 %rdx.start) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
|
|
; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC1VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i64 0
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
|
|
; IC1VF4-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD1]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE2]]
|
|
; IC1VF4-NEXT: [[TMP9]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
|
|
; IC1VF4-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP9]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP11]], -1
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP11]], i64 [[RDX_START]]
|
|
; IC1VF4-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
|
|
; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC1VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
|
|
; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC1VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
|
|
; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
|
|
; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
|
|
; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF4-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 -1, i64 -2, i64 -3, i64 -4>, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = sub <4 x i64> [[STEP_ADD]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = sub <4 x i64> [[STEP_ADD_2]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i64 0
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -1)
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -1)
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -1)
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
|
|
; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP7]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
|
|
; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD4]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP1]]
|
|
; IC4VF4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i8>, ptr [[TMP16]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1
|
|
; IC4VF4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP22]], align 1
|
|
; IC4VF4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD10]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD11]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD12]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP23:%.*]] = icmp sgt <4 x i8> [[REVERSE]], [[REVERSE14]]
|
|
; IC4VF4-NEXT: [[TMP24:%.*]] = icmp sgt <4 x i8> [[REVERSE7]], [[REVERSE15]]
|
|
; IC4VF4-NEXT: [[TMP25:%.*]] = icmp sgt <4 x i8> [[REVERSE8]], [[REVERSE16]]
|
|
; IC4VF4-NEXT: [[TMP26:%.*]] = icmp sgt <4 x i8> [[REVERSE9]], [[REVERSE17]]
|
|
; IC4VF4-NEXT: [[TMP27]] = select <4 x i1> [[TMP23]], <4 x i64> [[TMP0]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP28]] = select <4 x i1> [[TMP24]], <4 x i64> [[TMP2]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP29]] = select <4 x i1> [[TMP25]], <4 x i64> [[TMP3]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP30]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[TMP33]] = sub <4 x i64> [[STEP_ADD_3]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -16
|
|
; IC4VF4-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP27]], <4 x i64> [[TMP28]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX18:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP29]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX19:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[RDX_MINMAX18]], <4 x i64> [[TMP30]])
|
|
; IC4VF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX19]])
|
|
; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP32]], -1
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP32]], i64 [[RDX_START]]
|
|
; IC4VF4-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 15, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
|
|
; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC4VF4-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
|
|
; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC4VF4-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
|
|
; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
|
|
; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_iv_just_within_bounds(
|
|
; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*:]]
|
|
; IC4VF1-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF1: [[VECTOR_BODY]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ -1, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 -1, [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], -1
|
|
; IC4VF1-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], -1
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP3]]
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP5]]
|
|
; IC4VF1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP7]], align 1
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
|
|
; IC4VF1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP3]]
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP4]]
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP5]]
|
|
; IC4VF1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP6]]
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP15]], align 1
|
|
; IC4VF1-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP16]], align 1
|
|
; IC4VF1-NEXT: [[TMP21:%.*]] = load i8, ptr [[TMP17]], align 1
|
|
; IC4VF1-NEXT: [[TMP22:%.*]] = load i8, ptr [[TMP18]], align 1
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = icmp sgt i8 [[TMP11]], [[TMP19]]
|
|
; IC4VF1-NEXT: [[TMP24:%.*]] = icmp sgt i8 [[TMP12]], [[TMP20]]
|
|
; IC4VF1-NEXT: [[TMP25:%.*]] = icmp sgt i8 [[TMP13]], [[TMP21]]
|
|
; IC4VF1-NEXT: [[TMP26:%.*]] = icmp sgt i8 [[TMP14]], [[TMP22]]
|
|
; IC4VF1-NEXT: [[TMP27]] = select i1 [[TMP23]], i64 [[TMP3]], i64 [[VEC_PHI]]
|
|
; IC4VF1-NEXT: [[TMP28]] = select i1 [[TMP24]], i64 [[TMP4]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP29]] = select i1 [[TMP25]], i64 [[TMP5]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP30]] = select i1 [[TMP26]], i64 [[TMP6]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], -4
|
|
; IC4VF1-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP27]], i64 [[TMP28]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX]], i64 [[TMP29]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.umin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP30]])
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], -1
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 [[RDX_START]]
|
|
; IC4VF1-NEXT: br label %[[SCALAR_PH:.*]]
|
|
; IC4VF1: [[SCALAR_PH]]:
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 3, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
|
|
; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC4VF1-NEXT: [[LD_A:%.*]] = load i8, ptr [[GEP_A_IV]], align 1
|
|
; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC4VF1-NEXT: [[LD_B:%.*]] = load i8, ptr [[GEP_B_IV]], align 1
|
|
; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i8 [[LD_A]], [[LD_B]]
|
|
; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ -1, %entry ], [ %iv.next, %loop ]
|
|
%rdx = phi i64 [ %rdx.start, %entry ], [ %cond, %loop ]
|
|
%iv.next = add i64 %iv, -1
|
|
%gep.a.iv = getelementptr inbounds i8, ptr %a, i64 %iv.next
|
|
%ld.a = load i8, ptr %gep.a.iv, align 1
|
|
%gep.b.iv = getelementptr inbounds i8, ptr %b, i64 %iv.next
|
|
%ld.b = load i8, ptr %gep.b.iv, align 1
|
|
%cmp.a.b = icmp sgt i8 %ld.a, %ld.b
|
|
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
|
|
%exit.cond = icmp eq i64 %iv.next, 0
|
|
br i1 %exit.cond, label %exit, label %loop
|
|
|
|
exit:
|
|
ret i64 %cond
|
|
}
|
|
|
|
define i64 @select_decreasing_induction_icmp_non_const_start(ptr %a, ptr %b, i64 %rdx.start, i64 %n) {
|
|
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_non_const_start(
|
|
; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
|
|
; IC1VF4-NEXT: [[ENTRY:.*]]:
|
|
; IC1VF4-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
|
|
; IC1VF4-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 1)
|
|
; IC1VF4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[UMIN]]
|
|
; IC1VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
|
|
; IC1VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC1VF4: [[VECTOR_PH]]:
|
|
; IC1VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
|
|
; IC1VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
|
|
; IC1VF4-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
|
|
; IC1VF4-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; IC1VF4-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
|
|
; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC1VF4: [[VECTOR_BODY]]:
|
|
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[TMP3]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
|
; IC1VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
|
|
; IC1VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
|
; IC1VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 -3
|
|
; IC1VF4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
|
|
; IC1VF4-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD3]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC1VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE]], [[REVERSE4]]
|
|
; IC1VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP11]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC1VF4-NEXT: [[TMP12]] = or <4 x i1> [[VEC_PHI1]], [[TMP11]]
|
|
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i64> [[VEC_IND]], splat (i64 -4)
|
|
; IC1VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; IC1VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; IC1VF4: [[MIDDLE_BLOCK]]:
|
|
; IC1VF4-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP13]])
|
|
; IC1VF4-NEXT: [[TMP14:%.*]] = add nsw i64 [[TMP15]], -1
|
|
; IC1VF4-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
|
|
; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[TMP17]]
|
|
; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP14]], i64 [[RDX_START]]
|
|
; IC1VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
|
|
; IC1VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; IC1VF4: [[SCALAR_PH]]:
|
|
; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[ENTRY]] ]
|
|
; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
|
|
; IC1VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC1VF4: [[LOOP]]:
|
|
; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
|
|
; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC1VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
|
|
; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
|
|
; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
|
|
; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; IC1VF4: [[EXIT]]:
|
|
; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_non_const_start(
|
|
; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
|
|
; IC4VF4-NEXT: [[ENTRY:.*]]:
|
|
; IC4VF4-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
|
|
; IC4VF4-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 1)
|
|
; IC4VF4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[UMIN]]
|
|
; IC4VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16
|
|
; IC4VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF4: [[VECTOR_PH]]:
|
|
; IC4VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16
|
|
; IC4VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
|
|
; IC4VF4-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
|
|
; IC4VF4-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
|
|
; IC4VF4-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
|
|
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; IC4VF4: [[VECTOR_BODY]]:
|
|
; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ [[TMP3]], %[[VECTOR_PH]] ], [ [[TMP42:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP35:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP36:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP37:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[VECTOR_BODY]] ]
|
|
; IC4VF4-NEXT: [[STEP_ADD:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = sub <4 x i64> [[STEP_ADD]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = sub <4 x i64> [[STEP_ADD_2]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; IC4VF4-NEXT: [[TMP4:%.*]] = add nsw i64 [[OFFSET_IDX]], -1
|
|
; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
|
|
; IC4VF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD9]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD10]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP4]]
|
|
; IC4VF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 -3
|
|
; IC4VF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 -7
|
|
; IC4VF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 -11
|
|
; IC4VF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i64 -15
|
|
; IC4VF4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i64>, ptr [[TMP15]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x i64>, ptr [[TMP19]], align 8
|
|
; IC4VF4-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x i64>, ptr [[TMP22]], align 8
|
|
; IC4VF4-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD14]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD15]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD16]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD17]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
; IC4VF4-NEXT: [[TMP28:%.*]] = icmp sgt <4 x i64> [[REVERSE12]], [[REVERSE20]]
|
|
; IC4VF4-NEXT: [[TMP29:%.*]] = icmp sgt <4 x i64> [[REVERSE13]], [[REVERSE21]]
|
|
; IC4VF4-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i64> [[REVERSE14]], [[REVERSE22]]
|
|
; IC4VF4-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i64> [[REVERSE15]], [[REVERSE23]]
|
|
; IC4VF4-NEXT: [[TMP34]] = select <4 x i1> [[TMP28]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
|
|
; IC4VF4-NEXT: [[TMP35]] = select <4 x i1> [[TMP29]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
|
|
; IC4VF4-NEXT: [[TMP36]] = select <4 x i1> [[TMP20]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
|
|
; IC4VF4-NEXT: [[TMP37]] = select <4 x i1> [[TMP21]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
|
|
; IC4VF4-NEXT: [[TMP30]] = or <4 x i1> [[VEC_PHI4]], [[TMP28]]
|
|
; IC4VF4-NEXT: [[TMP31]] = or <4 x i1> [[VEC_PHI5]], [[TMP29]]
|
|
; IC4VF4-NEXT: [[TMP32]] = or <4 x i1> [[VEC_PHI6]], [[TMP20]]
|
|
; IC4VF4-NEXT: [[TMP33]] = or <4 x i1> [[VEC_PHI7]], [[TMP21]]
|
|
; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
|
; IC4VF4-NEXT: [[TMP42]] = sub <4 x i64> [[STEP_ADD_3]], splat (i64 4)
|
|
; IC4VF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; IC4VF4-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; IC4VF4: [[MIDDLE_BLOCK]]:
|
|
; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP34]], <4 x i64> [[TMP35]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX23:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP36]])
|
|
; IC4VF4-NEXT: [[RDX_MINMAX25:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX23]], <4 x i64> [[TMP37]])
|
|
; IC4VF4-NEXT: [[TMP39:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX25]])
|
|
; IC4VF4-NEXT: [[TMP38:%.*]] = add nsw i64 [[TMP39]], -1
|
|
; IC4VF4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP31]], [[TMP30]]
|
|
; IC4VF4-NEXT: [[BIN_RDX25:%.*]] = or <4 x i1> [[TMP32]], [[BIN_RDX]]
|
|
; IC4VF4-NEXT: [[BIN_RDX24:%.*]] = or <4 x i1> [[TMP33]], [[BIN_RDX25]]
|
|
; IC4VF4-NEXT: [[TMP40:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX24]])
|
|
; IC4VF4-NEXT: [[TMP41:%.*]] = freeze i1 [[TMP40]]
|
|
; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP41]], i64 [[TMP38]], i64 [[RDX_START]]
|
|
; IC4VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
|
|
; IC4VF4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
|
|
; IC4VF4: [[SCALAR_PH]]:
|
|
; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[ENTRY]] ]
|
|
; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
|
|
; IC4VF4-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF4: [[LOOP]]:
|
|
; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
|
|
; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
|
|
; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
|
|
; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC4VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
|
|
; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
|
|
; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
|
|
; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; IC4VF4: [[EXIT]]:
|
|
; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_non_const_start(
|
|
; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
|
|
; IC4VF1-NEXT: [[ENTRY:.*]]:
|
|
; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
|
|
; IC4VF1-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 1)
|
|
; IC4VF1-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[UMIN]]
|
|
; IC4VF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 4
|
|
; IC4VF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[EXIT:.*]], label %[[VECTOR_PH:.*]]
|
|
; IC4VF1: [[VECTOR_PH]]:
|
|
; IC4VF1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
|
|
; IC4VF1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
|
|
; IC4VF1-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[N_VEC]]
|
|
; IC4VF1-NEXT: br label %[[LOOP:.*]]
|
|
; IC4VF1: [[LOOP]]:
|
|
; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[COND:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP35:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP36:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP37:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI5:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI6:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[VEC_PHI7:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[LOOP]] ]
|
|
; IC4VF1-NEXT: [[IV:%.*]] = sub i64 [[N]], [[INDEX]]
|
|
; IC4VF1-NEXT: [[TMP3:%.*]] = add i64 [[IV]], -1
|
|
; IC4VF1-NEXT: [[TMP4:%.*]] = add i64 [[IV]], -2
|
|
; IC4VF1-NEXT: [[TMP5:%.*]] = add i64 [[IV]], -3
|
|
; IC4VF1-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[IV]], -1
|
|
; IC4VF1-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1
|
|
; IC4VF1-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP4]], -1
|
|
; IC4VF1-NEXT: [[TMP9:%.*]] = add nsw i64 [[TMP5]], -1
|
|
; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
|
|
; IC4VF1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
|
|
; IC4VF1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
|
|
; IC4VF1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]]
|
|
; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
|
|
; IC4VF1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
|
|
; IC4VF1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
|
|
; IC4VF1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP13]], align 8
|
|
; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
|
|
; IC4VF1-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP7]]
|
|
; IC4VF1-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP8]]
|
|
; IC4VF1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP9]]
|
|
; IC4VF1-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
|
|
; IC4VF1-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP19]], align 8
|
|
; IC4VF1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP20]], align 8
|
|
; IC4VF1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP21]], align 8
|
|
; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
|
|
; IC4VF1-NEXT: [[TMP27:%.*]] = icmp sgt i64 [[TMP15]], [[TMP23]]
|
|
; IC4VF1-NEXT: [[TMP28:%.*]] = icmp sgt i64 [[TMP16]], [[TMP24]]
|
|
; IC4VF1-NEXT: [[TMP29:%.*]] = icmp sgt i64 [[TMP17]], [[TMP25]]
|
|
; IC4VF1-NEXT: [[TMP30]] = or i1 [[VEC_PHI4]], [[CMP_A_B]]
|
|
; IC4VF1-NEXT: [[TMP31]] = or i1 [[VEC_PHI5]], [[TMP27]]
|
|
; IC4VF1-NEXT: [[TMP32]] = or i1 [[VEC_PHI6]], [[TMP28]]
|
|
; IC4VF1-NEXT: [[TMP33]] = or i1 [[VEC_PHI7]], [[TMP29]]
|
|
; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
|
|
; IC4VF1-NEXT: [[TMP35]] = select i1 [[TMP27]], i64 [[TMP7]], i64 [[VEC_PHI1]]
|
|
; IC4VF1-NEXT: [[TMP36]] = select i1 [[TMP28]], i64 [[TMP8]], i64 [[VEC_PHI2]]
|
|
; IC4VF1-NEXT: [[TMP37]] = select i1 [[TMP29]], i64 [[TMP9]], i64 [[VEC_PHI3]]
|
|
; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; IC4VF1-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; IC4VF1-NEXT: br i1 [[TMP38]], label %[[MIDDLE_BLOCK:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; IC4VF1: [[MIDDLE_BLOCK]]:
|
|
; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[COND]], i64 [[TMP35]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX9:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP36]])
|
|
; IC4VF1-NEXT: [[RDX_MINMAX11:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX9]], i64 [[TMP37]])
|
|
; IC4VF1-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP31]], [[TMP30]]
|
|
; IC4VF1-NEXT: [[BIN_RDX10:%.*]] = or i1 [[TMP32]], [[BIN_RDX]]
|
|
; IC4VF1-NEXT: [[BIN_RDX11:%.*]] = or i1 [[TMP33]], [[BIN_RDX10]]
|
|
; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = freeze i1 [[BIN_RDX11]]
|
|
; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX11]], i64 [[RDX_START]]
|
|
; IC4VF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
|
|
; IC4VF1-NEXT: br i1 [[CMP_N]], label %[[EXIT1:.*]], label %[[EXIT]]
|
|
; IC4VF1: [[EXIT]]:
|
|
; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[ENTRY]] ]
|
|
; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_START]], %[[ENTRY]] ]
|
|
; IC4VF1-NEXT: br label %[[LOOP1:.*]]
|
|
; IC4VF1: [[LOOP1]]:
|
|
; IC4VF1-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT1:%.*]], %[[LOOP1]] ], [ [[BC_RESUME_VAL]], %[[EXIT]] ]
|
|
; IC4VF1-NEXT: [[RDX1:%.*]] = phi i64 [ [[COND1:%.*]], %[[LOOP1]] ], [ [[BC_MERGE_RDX]], %[[EXIT]] ]
|
|
; IC4VF1-NEXT: [[IV_NEXT1]] = add nsw i64 [[IV1]], -1
|
|
; IC4VF1-NEXT: [[GEP_A_IV1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT1]]
|
|
; IC4VF1-NEXT: [[LD_A1:%.*]] = load i64, ptr [[GEP_A_IV1]], align 8
|
|
; IC4VF1-NEXT: [[GEP_B_IV1:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT1]]
|
|
; IC4VF1-NEXT: [[LD_B1:%.*]] = load i64, ptr [[GEP_B_IV1]], align 8
|
|
; IC4VF1-NEXT: [[CMP_A_B1:%.*]] = icmp sgt i64 [[LD_A1]], [[LD_B1]]
|
|
; IC4VF1-NEXT: [[COND1]] = select i1 [[CMP_A_B1]], i64 [[IV_NEXT1]], i64 [[RDX1]]
|
|
; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV1]], 1
|
|
; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[LOOP1]], label %[[EXIT1]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; IC4VF1: [[EXIT1]]:
|
|
; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND1]], %[[LOOP1]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
|
|
; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i64 [ %iv.next, %loop ], [ %n, %entry ]
|
|
%rdx = phi i64 [ %cond, %loop ], [ %rdx.start, %entry ]
|
|
%iv.next = add nsw i64 %iv, -1
|
|
%gep.a.iv = getelementptr inbounds i64, ptr %a, i64 %iv.next
|
|
%ld.a = load i64, ptr %gep.a.iv, align 8
|
|
%gep.b.iv = getelementptr inbounds i64, ptr %b, i64 %iv.next
|
|
%ld.b = load i64, ptr %gep.b.iv, align 8
|
|
%cmp.a.b = icmp sgt i64 %ld.a, %ld.b
|
|
%cond = select i1 %cmp.a.b, i64 %iv.next, i64 %rdx
|
|
%exit.cond = icmp ugt i64 %iv, 1
|
|
br i1 %exit.cond, label %loop, label %exit
|
|
|
|
exit:
|
|
ret i64 %cond
|
|
}
|