Files
llvm-project/llvm/test/Transforms/LoopVectorize/predicated-inductions.ll
Ramkumar Ramachandra e6d46f16ab [VPlan] Expand DerivedIV into executable recipes (#187589)
This allows us to strip DerivedIVRecipe::execute, and remove the
dependency on emitTransformedIndex. It allows us to benefit from
existing simplifications in VPlan.
2026-04-29 17:27:11 +01:00

719 lines
42 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 < %s | FileCheck --check-prefixes=COMMON,CHECK %s
; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \
; RUN: -vectorize-scev-check-threshold=0 %s | FileCheck --check-prefixes=COMMON,THRESHOLD0 %s
; RUN: opt -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \
; RUN: -vectorize-scev-check-threshold=1 %s | FileCheck --check-prefixes=COMMON,THRESHOLD1 %s
define i64 @predicated_iv_with_liveout(ptr %dst, i64 %n) {
; COMMON-LABEL: define i64 @predicated_iv_with_liveout(
; COMMON-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; COMMON-NEXT: [[ENTRY:.*]]:
; COMMON-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; COMMON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX1]], 4
; COMMON-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; COMMON: [[VECTOR_SCEVCHECK]]:
; COMMON-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; COMMON-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; COMMON-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 65535
; COMMON-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; COMMON: [[VECTOR_PH]]:
; COMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX1]], 4
; COMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX1]], [[N_MOD_VF]]
; COMMON-NEXT: [[TMP2:%.*]] = trunc i64 [[N_VEC]] to i16
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; COMMON: [[VECTOR_BODY]]:
; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[VEC_IND2:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[TMP3:%.*]] = add <4 x i16> [[VEC_IND2]], splat (i16 1)
; COMMON-NEXT: [[TMP4]] = zext <4 x i16> [[TMP3]] to <4 x i64>
; COMMON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; COMMON-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i64 0
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP6]]
; COMMON-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8
; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COMMON-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; COMMON-NEXT: [[VEC_IND_NEXT3]] = add <4 x i16> [[VEC_IND2]], splat (i16 4)
; COMMON-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COMMON-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; COMMON: [[MIDDLE_BLOCK]]:
; COMMON-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP4]], i64 3
; COMMON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX1]], [[N_VEC]]
; COMMON-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COMMON: [[SCALAR_PH]]:
; COMMON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; COMMON-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i16 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; COMMON-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; COMMON-NEXT: br label %[[LOOP:.*]]
; COMMON: [[LOOP]]:
; COMMON-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[DEAD_IV:%.*]] = phi i16 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[DEAD_IV_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[PREV:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[EXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; COMMON-NEXT: [[DEAD_IV_NEXT]] = add i16 [[DEAD_IV]], 1
; COMMON-NEXT: [[EXT]] = zext i16 [[DEAD_IV_NEXT]] to i64
; COMMON-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[PREV]]
; COMMON-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
; COMMON-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; COMMON-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
; COMMON: [[EXIT]]:
; COMMON-NEXT: [[RESULT:%.*]] = phi i64 [ [[EXT]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ]
; COMMON-NEXT: ret i64 [[RESULT]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%dead.iv = phi i16 [ 0, %entry ], [ %dead.iv.next, %loop ]
%prev = phi i64 [ 0, %entry ], [ %ext, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%dead.iv.next = add i16 %dead.iv, 1
%ext = zext i16 %dead.iv.next to i64
%gep = getelementptr inbounds i64, ptr %dst, i64 %prev
store i64 %iv, ptr %gep, align 8
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
%result = phi i64 [ %ext, %loop ]
ret i64 %result
}
define void @dead_predicated_iv1(ptr %dst, i64 %n) {
; COMMON-LABEL: define void @dead_predicated_iv1(
; COMMON-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; COMMON-NEXT: [[ENTRY:.*]]:
; COMMON-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; COMMON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
; COMMON-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; COMMON: [[VECTOR_PH]]:
; COMMON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
; COMMON-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
; COMMON-NEXT: [[TMP0:%.*]] = trunc i64 [[N_VEC]] to i16
; COMMON-NEXT: br label %[[VECTOR_BODY:.*]]
; COMMON: [[VECTOR_BODY]]:
; COMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; COMMON-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
; COMMON-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64>
; COMMON-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; COMMON-NEXT: store <4 x i64> [[TMP2]], ptr [[TMP3]], align 8
; COMMON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; COMMON-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
; COMMON-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; COMMON-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; COMMON: [[MIDDLE_BLOCK]]:
; COMMON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; COMMON-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; COMMON: [[SCALAR_PH]]:
; COMMON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; COMMON-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i16 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; COMMON-NEXT: br label %[[LOOP:.*]]
; COMMON: [[LOOP]]:
; COMMON-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[DEAD_IV:%.*]] = phi i16 [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[DEAD_IV_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; COMMON-NEXT: [[DEAD_IV_NEXT]] = add i16 [[DEAD_IV]], 1
; COMMON-NEXT: [[EXT:%.*]] = zext i16 [[DEAD_IV_NEXT]] to i64
; COMMON-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; COMMON-NEXT: store i64 [[EXT]], ptr [[GEP]], align 8
; COMMON-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; COMMON-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
; COMMON: [[EXIT]]:
; COMMON-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%dead.iv = phi i16 [ 0, %entry ], [ %dead.iv.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%dead.iv.next = add i16 %dead.iv, 1
%ext = zext i16 %dead.iv.next to i64
%gep = getelementptr inbounds i64, ptr %dst, i64 %iv
store i64 %ext, ptr %gep, align 8
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
define void @dead_predicated_iv2(ptr %dst, i64 %n) {
; CHECK-LABEL: define void @dead_predicated_iv2(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX1]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 9, i8 [[TMP1]])
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX1]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX1]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[DOTCAST]], 9
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP5]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[DEAD_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; CHECK-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
; THRESHOLD0-LABEL: define void @dead_predicated_iv2(
; THRESHOLD0-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; THRESHOLD0-NEXT: [[ENTRY:.*]]:
; THRESHOLD0-NEXT: br label %[[LOOP:.*]]
; THRESHOLD0: [[LOOP]]:
; THRESHOLD0-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[DEAD_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD0-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; THRESHOLD0-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD0-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; THRESHOLD0-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
; THRESHOLD0-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD0-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD0: [[EXIT]]:
; THRESHOLD0-NEXT: ret void
;
; THRESHOLD1-LABEL: define void @dead_predicated_iv2(
; THRESHOLD1-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; THRESHOLD1-NEXT: [[ENTRY:.*]]:
; THRESHOLD1-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; THRESHOLD1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX1]], 4
; THRESHOLD1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; THRESHOLD1: [[VECTOR_SCEVCHECK]]:
; THRESHOLD1-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; THRESHOLD1-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; THRESHOLD1-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
; THRESHOLD1-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 9, i8 [[TMP1]])
; THRESHOLD1-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; THRESHOLD1-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 255
; THRESHOLD1-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP2]]
; THRESHOLD1-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; THRESHOLD1: [[VECTOR_PH]]:
; THRESHOLD1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX1]], 4
; THRESHOLD1-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX1]], [[N_MOD_VF]]
; THRESHOLD1-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; THRESHOLD1-NEXT: [[TMP4:%.*]] = mul i32 [[DOTCAST]], 9
; THRESHOLD1-NEXT: br label %[[VECTOR_BODY:.*]]
; THRESHOLD1: [[VECTOR_BODY]]:
; THRESHOLD1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; THRESHOLD1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; THRESHOLD1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
; THRESHOLD1-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP5]], align 8
; THRESHOLD1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; THRESHOLD1-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 4)
; THRESHOLD1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; THRESHOLD1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; THRESHOLD1: [[MIDDLE_BLOCK]]:
; THRESHOLD1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX1]], [[N_VEC]]
; THRESHOLD1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; THRESHOLD1: [[SCALAR_PH]]:
; THRESHOLD1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; THRESHOLD1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
; THRESHOLD1-NEXT: br label %[[LOOP:.*]]
; THRESHOLD1: [[LOOP]]:
; THRESHOLD1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[DEAD_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD1-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; THRESHOLD1-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
; THRESHOLD1-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
; THRESHOLD1-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD1-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; THRESHOLD1: [[EXIT]]:
; THRESHOLD1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%dead.iv = phi i32 [ 0, %entry ], [ %dead.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%masked = and i32 %dead.iv, 255
%dead.next = add nuw nsw i32 %masked, 9
%gep = getelementptr inbounds i64, ptr %dst, i64 %iv
store i64 %iv, ptr %gep, align 8
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
define i32 @predicated_iv_liveout_with_lai_preds(ptr %dst, ptr %src, i64 %stride, i64 %n) {
; COMMON-LABEL: define i32 @predicated_iv_liveout_with_lai_preds(
; COMMON-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; COMMON-NEXT: [[ENTRY:.*]]:
; COMMON-NEXT: br label %[[LOOP:.*]]
; COMMON: [[LOOP]]:
; COMMON-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[PRED_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT:%.*]], %[[LOOP]] ]
; COMMON-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; COMMON-NEXT: [[MASKED:%.*]] = and i32 [[PRED_IV]], 255
; COMMON-NEXT: [[PRED_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; COMMON-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]]
; COMMON-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; COMMON-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; COMMON-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[STRIDE_OFF]]
; COMMON-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; COMMON-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; COMMON-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; COMMON: [[EXIT]]:
; COMMON-NEXT: [[RESULT:%.*]] = phi i32 [ [[PRED_NEXT]], %[[LOOP]] ]
; COMMON-NEXT: ret i32 [[RESULT]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%pred.iv = phi i32 [ 0, %entry ], [ %pred.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%masked = and i32 %pred.iv, 255
%pred.next = add nuw nsw i32 %masked, 9
; Symbolic stride access generates LAI SCEV predicates (stride == 1 check).
%gep.src = getelementptr inbounds i32, ptr %src, i64 %iv
%val = load i32, ptr %gep.src, align 4
%stride.off = mul nsw i64 %iv, %stride
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 %stride.off
store i32 %val, ptr %gep.dst, align 4
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
%result = phi i32 [ %pred.next, %loop ]
ret i32 %result
}
define void @total_complexity_exceeds_threshold(ptr %dst, ptr %src, i64 %stride, i64 %n) {
; CHECK-LABEL: define void @total_complexity_exceeds_threshold(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[SRC2:%.*]] = ptrtoaddr ptr [[SRC]] to i64
; CHECK-NEXT: [[DST1:%.*]] = ptrtoaddr ptr [[DST]] to i64
; CHECK-NEXT: [[SMAX3:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 9, i8 [[TMP1]])
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP2]]
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE]], 1
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[IDENT_CHECK]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[DST1]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX3]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[DOTCAST]], 9
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[DEAD_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; CHECK-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; CHECK-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
; THRESHOLD0-LABEL: define void @total_complexity_exceeds_threshold(
; THRESHOLD0-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; THRESHOLD0-NEXT: [[ENTRY:.*]]:
; THRESHOLD0-NEXT: br label %[[LOOP:.*]]
; THRESHOLD0: [[LOOP]]:
; THRESHOLD0-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[DEAD_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD0-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; THRESHOLD0-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD0-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; THRESHOLD0-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; THRESHOLD0-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; THRESHOLD0-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; THRESHOLD0-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; THRESHOLD0-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD0-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD0: [[EXIT]]:
; THRESHOLD0-NEXT: ret void
;
; THRESHOLD1-LABEL: define void @total_complexity_exceeds_threshold(
; THRESHOLD1-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; THRESHOLD1-NEXT: [[ENTRY:.*]]:
; THRESHOLD1-NEXT: br label %[[LOOP:.*]]
; THRESHOLD1: [[LOOP]]:
; THRESHOLD1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[DEAD_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[DEAD_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD1-NEXT: [[MASKED:%.*]] = and i32 [[DEAD_IV]], 255
; THRESHOLD1-NEXT: [[DEAD_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD1-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; THRESHOLD1-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; THRESHOLD1-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; THRESHOLD1-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; THRESHOLD1-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; THRESHOLD1-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD1-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD1: [[EXIT]]:
; THRESHOLD1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%dead.iv = phi i32 [ 0, %entry ], [ %dead.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%masked = and i32 %dead.iv, 255
%dead.next = add nuw nsw i32 %masked, 9
; Symbolic stride access to generate LAI predicates.
%stride.off = mul nsw i64 %iv, %stride
%gep.src = getelementptr inbounds i32, ptr %src, i64 %stride.off
%val = load i32, ptr %gep.src, align 4
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 %val, ptr %gep.dst, align 4
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
define void @combined_lai_iv_complexity(ptr %dst, ptr %src, i64 %stride, i64 %n) {
; CHECK-LABEL: define void @combined_lai_iv_complexity(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[SCALAR_PH1:.*]]:
; CHECK-NEXT: [[SRC2:%.*]] = ptrtoaddr ptr [[SRC]] to i64
; CHECK-NEXT: [[DST1:%.*]] = ptrtoaddr ptr [[DST]] to i64
; CHECK-NEXT: [[SMAX3:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 9, i8 [[TMP1]])
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP2]]
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE]], 1
; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[IDENT_CHECK]]
; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[DST1]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX3]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[DOTCAST]], 9
; CHECK-NEXT: br label %[[LOOP1:.*]]
; CHECK: [[LOOP1]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP1]] ]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[LOOP1]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[SCALAR_PH1]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[SCALAR_PH1]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[PRED_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[PRED_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[MASKED:%.*]] = and i32 [[PRED_IV]], 255
; CHECK-NEXT: [[PRED_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; CHECK-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
; THRESHOLD0-LABEL: define void @combined_lai_iv_complexity(
; THRESHOLD0-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; THRESHOLD0-NEXT: [[ENTRY:.*]]:
; THRESHOLD0-NEXT: br label %[[LOOP:.*]]
; THRESHOLD0: [[LOOP]]:
; THRESHOLD0-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[PRED_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD0-NEXT: [[MASKED:%.*]] = and i32 [[PRED_IV]], 255
; THRESHOLD0-NEXT: [[PRED_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD0-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; THRESHOLD0-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; THRESHOLD0-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; THRESHOLD0-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; THRESHOLD0-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; THRESHOLD0-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD0-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD0: [[EXIT]]:
; THRESHOLD0-NEXT: ret void
;
; THRESHOLD1-LABEL: define void @combined_lai_iv_complexity(
; THRESHOLD1-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[STRIDE:%.*]], i64 [[N:%.*]]) {
; THRESHOLD1-NEXT: [[ENTRY:.*]]:
; THRESHOLD1-NEXT: br label %[[LOOP:.*]]
; THRESHOLD1: [[LOOP]]:
; THRESHOLD1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[PRED_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD1-NEXT: [[MASKED:%.*]] = and i32 [[PRED_IV]], 255
; THRESHOLD1-NEXT: [[PRED_NEXT]] = add nuw nsw i32 [[MASKED]], 9
; THRESHOLD1-NEXT: [[STRIDE_OFF:%.*]] = mul nsw i64 [[IV]], [[STRIDE]]
; THRESHOLD1-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[STRIDE_OFF]]
; THRESHOLD1-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; THRESHOLD1-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; THRESHOLD1-NEXT: store i32 [[VAL]], ptr [[GEP_DST]], align 4
; THRESHOLD1-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD1-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD1: [[EXIT]]:
; THRESHOLD1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%pred.iv = phi i32 [ 0, %entry ], [ %pred.next, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%masked = and i32 %pred.iv, 255
%pred.next = add nuw nsw i32 %masked, 9
; Symbolic stride access generates LAI SCEV predicates (stride == 1 check).
%stride.off = mul nsw i64 %iv, %stride
%gep.src = getelementptr inbounds i32, ptr %src, i64 %stride.off
%val = load i32, ptr %gep.src, align 4
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 %val, ptr %gep.dst, align 4
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}
define void @two_used_predicated_ivs(ptr %dst1, ptr %dst2, i64 %n) {
; CHECK-LABEL: define void @two_used_predicated_ivs(
; CHECK-SAME: ptr [[DST1:%.*]], ptr [[DST2:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[DST15:%.*]] = ptrtoaddr ptr [[DST1]] to i64
; CHECK-NEXT: [[DST24:%.*]] = ptrtoaddr ptr [[DST2]] to i64
; CHECK-NEXT: [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8
; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 9, i8 [[TMP1]])
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP0]], 255
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP0]] to i7
; CHECK-NEXT: [[MUL1:%.*]] = call { i7, i1 } @llvm.umul.with.overflow.i7(i7 5, i7 [[TMP4]])
; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i7, i1 } [[MUL1]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 127
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[MUL_OVERFLOW3]], [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP3]], [[TMP6]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[DST24]], [[DST15]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], 16
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX6]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX6]], [[N_MOD_VF]]
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[DOTCAST]], 9
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[DOTCAST]], 5
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 9, i32 18, i32 27>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ <i32 0, i32 5, i32 10, i32 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 9)
; CHECK-NEXT: [[TMP12:%.*]] = add nuw nsw <4 x i32> [[VEC_IND8]], splat (i32 5)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST1]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST2]], i64 [[INDEX]]
; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 36)
; CHECK-NEXT: [[VEC_IND_NEXT9]] = add nuw nsw <4 x i32> [[VEC_IND8]], splat (i32 20)
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i32 [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[PRED_IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL9]], %[[SCALAR_PH]] ], [ [[PRED_NEXT1:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[PRED_IV2:%.*]] = phi i32 [ [[BC_RESUME_VAL10]], %[[SCALAR_PH]] ], [ [[PRED_NEXT2:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[MASKED1:%.*]] = and i32 [[PRED_IV1]], 255
; CHECK-NEXT: [[PRED_NEXT1]] = add nuw nsw i32 [[MASKED1]], 9
; CHECK-NEXT: [[MASKED2:%.*]] = and i32 [[PRED_IV2]], 127
; CHECK-NEXT: [[PRED_NEXT2]] = add nuw nsw i32 [[MASKED2]], 5
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[DST1]], i64 [[IV]]
; CHECK-NEXT: store i32 [[PRED_NEXT1]], ptr [[GEP1]], align 4
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[DST2]], i64 [[IV]]
; CHECK-NEXT: store i32 [[PRED_NEXT2]], ptr [[GEP2]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
; THRESHOLD0-LABEL: define void @two_used_predicated_ivs(
; THRESHOLD0-SAME: ptr [[DST1:%.*]], ptr [[DST2:%.*]], i64 [[N:%.*]]) {
; THRESHOLD0-NEXT: [[ENTRY:.*]]:
; THRESHOLD0-NEXT: br label %[[LOOP:.*]]
; THRESHOLD0: [[LOOP]]:
; THRESHOLD0-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[PRED_IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT1:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[PRED_IV2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT2:%.*]], %[[LOOP]] ]
; THRESHOLD0-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD0-NEXT: [[MASKED1:%.*]] = and i32 [[PRED_IV1]], 255
; THRESHOLD0-NEXT: [[PRED_NEXT1]] = add nuw nsw i32 [[MASKED1]], 9
; THRESHOLD0-NEXT: [[MASKED2:%.*]] = and i32 [[PRED_IV2]], 127
; THRESHOLD0-NEXT: [[PRED_NEXT2]] = add nuw nsw i32 [[MASKED2]], 5
; THRESHOLD0-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[DST1]], i64 [[IV]]
; THRESHOLD0-NEXT: store i32 [[PRED_NEXT1]], ptr [[GEP1]], align 4
; THRESHOLD0-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[DST2]], i64 [[IV]]
; THRESHOLD0-NEXT: store i32 [[PRED_NEXT2]], ptr [[GEP2]], align 4
; THRESHOLD0-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD0-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD0: [[EXIT]]:
; THRESHOLD0-NEXT: ret void
;
; THRESHOLD1-LABEL: define void @two_used_predicated_ivs(
; THRESHOLD1-SAME: ptr [[DST1:%.*]], ptr [[DST2:%.*]], i64 [[N:%.*]]) {
; THRESHOLD1-NEXT: [[ENTRY:.*]]:
; THRESHOLD1-NEXT: br label %[[LOOP:.*]]
; THRESHOLD1: [[LOOP]]:
; THRESHOLD1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[PRED_IV1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT1:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[PRED_IV2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PRED_NEXT2:%.*]], %[[LOOP]] ]
; THRESHOLD1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; THRESHOLD1-NEXT: [[MASKED1:%.*]] = and i32 [[PRED_IV1]], 255
; THRESHOLD1-NEXT: [[PRED_NEXT1]] = add nuw nsw i32 [[MASKED1]], 9
; THRESHOLD1-NEXT: [[MASKED2:%.*]] = and i32 [[PRED_IV2]], 127
; THRESHOLD1-NEXT: [[PRED_NEXT2]] = add nuw nsw i32 [[MASKED2]], 5
; THRESHOLD1-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[DST1]], i64 [[IV]]
; THRESHOLD1-NEXT: store i32 [[PRED_NEXT1]], ptr [[GEP1]], align 4
; THRESHOLD1-NEXT: [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[DST2]], i64 [[IV]]
; THRESHOLD1-NEXT: store i32 [[PRED_NEXT2]], ptr [[GEP2]], align 4
; THRESHOLD1-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV_NEXT]], [[N]]
; THRESHOLD1-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; THRESHOLD1: [[EXIT]]:
; THRESHOLD1-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%pred.iv1 = phi i32 [ 0, %entry ], [ %pred.next1, %loop ]
%pred.iv2 = phi i32 [ 0, %entry ], [ %pred.next2, %loop ]
%iv.next = add nuw nsw i64 %iv, 1
%masked1 = and i32 %pred.iv1, 255
%pred.next1 = add nuw nsw i32 %masked1, 9
%masked2 = and i32 %pred.iv2, 127
%pred.next2 = add nuw nsw i32 %masked2, 5
%gep1 = getelementptr inbounds i32, ptr %dst1, i64 %iv
store i32 %pred.next1, ptr %gep1, align 4
%gep2 = getelementptr inbounds i32, ptr %dst2, i64 %iv
store i32 %pred.next2, ptr %gep2, align 4
%cmp = icmp slt i64 %iv.next, %n
br i1 %cmp, label %loop, label %exit
exit:
ret void
}