Files
David Sherwood 1d9775f684 [LV] Change VPLane::getAsRuntimeExpr to use constant 64-bit indices (#193206)
The canonical form preferred by instcombine is to use 64-bit values for
the index when it is a constant. We should try to do the same where
possible in the loop vectoriser as this reduces churn in the compiler.

It also makes other work easier, such as removing extra unnecessary
passes on the RUN line in the test directory which I plan to do
afterwards.
2026-04-22 11:33:42 +01:00

248 lines
10 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @test(i32 %tc, ptr %p) {
; CHECK-LABEL: @test(
; CHECK-NEXT: br label [[FOR_BODY_LR_PH_I_I_I:%.*]]
; CHECK: for.body.lr.ph.i.i.i:
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TC:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_I_I_I:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH_I_I_I]] ]
; CHECK-NEXT: br label [[FOR_BODY_I_I_I:%.*]]
; CHECK: for.body.i.i.i:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC_I_I_I:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: br label [[FOR_INC_I_I_I]]
; CHECK: for.inc.i.i.i:
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[TC]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_I_I_I]], label [[FOR_END_I_I_I]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.end.i.i.i:
; CHECK-NEXT: [[LCSSA:%.*]] = phi ptr [ [[P:%.*]], [[FOR_INC_I_I_I]] ], [ [[P]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: unreachable
;
br label %for.body.lr.ph.i.i.i
for.body.lr.ph.i.i.i:
br label %for.body.i.i.i
for.body.i.i.i:
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.i.i.i ], [ 0, %for.body.lr.ph.i.i.i ]
br label %for.inc.i.i.i
for.inc.i.i.i:
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp ne i32 %lftr.wideiv, %tc
br i1 %exitcond, label %for.body.i.i.i, label %for.end.i.i.i
for.end.i.i.i:
%lcssa = phi ptr [ %p, %for.inc.i.i.i ]
unreachable
}
; PR16139
define void @test2(ptr %x) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: indirectbr ptr [[X:%.*]], [label [[L0:%.*]], label [[L1:%.*]]]
; CHECK: L0:
; CHECK-NEXT: br label [[L0]]
; CHECK: L1:
; CHECK-NEXT: ret void
;
entry:
indirectbr ptr %x, [ label %L0, label %L1 ]
L0:
br label %L0
L1:
ret void
}
; This loop has different uniform instructions before and after LCSSA.
define void @test3(ptr %p) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD41:%.*]] = add i32 3, 3
; CHECK-NEXT: [[IDXPROM4736:%.*]] = zext i32 [[ADD41]] to i64
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P:%.*]], i64 0, i64 6
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX48]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 7
; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 8
; CHECK-NEXT: store i8 0, ptr [[TMP17]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P]], i64 0, i64 9
; CHECK-NEXT: store i8 0, ptr [[TMP19]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[WHILE_END:%.*]]
; CHECK: while.end:
; CHECK-NEXT: [[ADD58:%.*]] = add i32 8, 4
; CHECK-NEXT: ret void
;
entry:
%add41 = add i32 3, 3
%idxprom4736 = zext i32 %add41 to i64
br label %while.body
while.body:
%idxprom4738 = phi i64 [ %idxprom47, %while.body ], [ %idxprom4736, %entry ]
%pos.337 = phi i32 [ %inc46, %while.body ], [ %add41, %entry ]
%inc46 = add i32 %pos.337, 1
%arrayidx48 = getelementptr inbounds [1024 x i8], ptr %p, i64 0, i64 %idxprom4738
store i8 0, ptr %arrayidx48, align 1
%and43 = and i32 %inc46, 3
%cmp44 = icmp eq i32 %and43, 0
%idxprom47 = zext i32 %inc46 to i64
br i1 %cmp44, label %while.end, label %while.body
while.end:
%add58 = add i32 %inc46, 4
ret void
}
; Make sure LV doesn't crash on IR where some LCSSA uses are unreachable.
define i32 @pr57508(ptr %src) {
; CHECK-LABEL: @pr57508(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2000
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
; CHECK-NEXT: [[LOCAL:%.*]] = phi i32 [ [[LOCAL_NEXT:%.*]], [[LOOP]] ], [ 2000, [[SCALAR_PH]] ]
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[LOCAL_NEXT]] = add i32 [[LOCAL]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 2000
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.exit:
; CHECK-NEXT: unreachable
; CHECK: bb:
; CHECK-NEXT: [[LOCAL_USE:%.*]] = add i32 poison, 1
; CHECK-NEXT: ret i32 [[LOCAL_USE]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
%local = phi i32 [ %local.next, %loop ], [ 0, %entry ]
%iv.next = add nuw nsw i64 %iv, 1
%local.next = add i32 %local, 1
%ec = icmp eq i64 %iv, 2000
br i1 %ec, label %loop.exit, label %loop
loop.exit:
unreachable
bb:
%local.use = add i32 %local, 1
ret i32 %local.use
}
; Test that exit phi extracts are inserted after the definition of the value
; being extracted. This used to crash due to dominance violation when the sunk
; select was generated after the extractelement for the exit phi.
define i32 @exit_phi_sunk_def(ptr noalias %src, ptr noalias %dst) {
; CHECK-LABEL: @exit_phi_sunk_def(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: store i32 1, ptr [[DST:%.*]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: store i32 2, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: store i32 3, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br i1 false, label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: store i32 4, ptr [[DST]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[SEL]], i64 0
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[EXT]]
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
%ld = load i32, ptr %src, align 4
%cmp = icmp eq i32 %ld, 0
%sel = select i1 %cmp, i32 0, i32 2
%iv.next = add nuw nsw i32 %iv, 1
store i32 %iv.next, ptr %dst, align 4
%exit.cond = icmp ult i32 %iv, 2
br i1 %exit.cond, label %loop, label %exit
exit:
ret i32 %sel
}