Files
Florian Hahn 42166b6dcf [LV] Update forced epilogue VF options to allow different VFs than main. (#190393)
Previously, forced epilogue vector factors via the command line options
required to match the forced main VF (or the VF to be built in general).
This leads to a number of akward tests, where we end up with dead
epilogue vector loops.

Update the logic to build an additional VPlan with the epilogue vector
factor, and require the provided epilogue VF to be < IC * MainLoopVF.
Otherwise, epilogue vectorization is skipped.

This only impacts the forced epilogue VF option used for testing and
ensures epilogue tests to cover more realistic scenarios and make them
more robust w.r.t. to additional VPlan-based folding.

PR: https://github.com/llvm/llvm-project/pull/190393
2026-04-09 11:50:48 +00:00

155 lines
9.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -enable-epilogue-vectorization \
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
; MAINVF4IC1_EPI4-LABEL: define void @f0(
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]]
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
; MAINVF4IC1_EPI4: [[LOOP]]:
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]]
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
; MAINVF4IC1_EPI4: br label %[[EXIT]]
; MAINVF4IC1_EPI4: [[EXIT]]:
;
; MAINVF4IC2_EPI4-LABEL: define void @f0(
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]]
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
; MAINVF4IC2_EPI4: [[LOOP]]:
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]]
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
; MAINVF4IC2_EPI4: br label %[[EXIT]]
; MAINVF4IC2_EPI4: [[EXIT]]:
;
entry:
%cmp.entry = icmp sgt i32 %len, 0
br i1 %cmp.entry, label %loop, label %exit, !prof !1
loop:
%i8 = phi i8 [0, %entry], [%i8.inc, %loop]
%i32 = phi i32 [0, %entry], [%i32.inc, %loop]
%ptr = getelementptr inbounds i32, ptr %p, i8 %i8
store i32 %i32, ptr %ptr
%i8.inc = add i8 %i8, 1
%i32.inc = add i32 %i32, 1
%cmp.loop = icmp ult i32 %i32, %len
br i1 %cmp.loop, label %loop, label %exit, !prof !2
exit:
ret void
}
!0 = !{!"function_entry_count", i64 13}
!1 = !{!"branch_weights", i32 12, i32 1}
!2 = !{!"branch_weights", i32 1234, i32 1}
;.
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
; MAINVF4IC1_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154}
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7}
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4}
; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 0}
; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3}
; MAINVF4IC1_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1}
; MAINVF4IC1_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]}
; MAINVF4IC1_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3}
;.
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
; MAINVF4IC2_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154}
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7}
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4}
; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 0}
; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]}
; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3}
; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1}
; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]}
; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3}
;.