Previously, forced epilogue vector factors via the command line options required to match the forced main VF (or the VF to be built in general). This leads to a number of akward tests, where we end up with dead epilogue vector loops. Update the logic to build an additional VPlan with the epilogue vector factor, and require the provided epilogue VF to be < IC * MainLoopVF. Otherwise, epilogue vectorization is skipped. This only impacts the forced epilogue VF option used for testing and ensures epilogue tests to cover more realistic scenarios and make them more robust w.r.t. to additional VPlan-based folding. PR: https://github.com/llvm/llvm-project/pull/190393
155 lines
9.7 KiB
LLVM
155 lines
9.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "br " --filter "^.*:" --filter "icmp" --version 5
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC1_EPI4
|
|
; RUN: opt < %s -S -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-epilogue-vectorization \
|
|
; RUN: -epilogue-vectorization-force-VF=4 | FileCheck %s --check-prefix=MAINVF4IC2_EPI4
|
|
|
|
define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 {
|
|
; MAINVF4IC1_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC1_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC1_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC1_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC1_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC1_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC1_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC1_EPI4: [[LOOP]]:
|
|
; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC1_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC1_EPI4: [[EXIT]]:
|
|
;
|
|
; MAINVF4IC2_EPI4-LABEL: define void @f0(
|
|
; MAINVF4IC2_EPI4-SAME: i8 [[N:%.*]], i32 [[LEN:%.*]], ptr [[P:%.*]]) !prof [[PROF0:![0-9]+]] {
|
|
; MAINVF4IC2_EPI4: [[ENTRY:.*:]]
|
|
; MAINVF4IC2_EPI4: [[CMP_ENTRY:%.*]] = icmp sgt i32 [[LEN]], 0
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_ENTRY]], label %[[ITER_CHECK:.*]], label %[[EXIT:.*]], !prof [[PROF1:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]], !prof [[PROF2:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_SCEVCHECK]]:
|
|
; MAINVF4IC2_EPI4: [[TMP2:%.*]] = icmp slt i8 [[TMP1:%.*]], 0
|
|
; MAINVF4IC2_EPI4: [[TMP3:%.*]] = icmp ugt i32 [[LEN]], 255
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP4:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 8
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]], !prof [[PROF2]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]:
|
|
; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4
|
|
; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]:
|
|
; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]]
|
|
; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]:
|
|
; MAINVF4IC2_EPI4: br label %[[LOOP:.*]]
|
|
; MAINVF4IC2_EPI4: [[LOOP]]:
|
|
; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]]
|
|
; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]]
|
|
; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]:
|
|
; MAINVF4IC2_EPI4: br label %[[EXIT]]
|
|
; MAINVF4IC2_EPI4: [[EXIT]]:
|
|
;
|
|
entry:
|
|
%cmp.entry = icmp sgt i32 %len, 0
|
|
br i1 %cmp.entry, label %loop, label %exit, !prof !1
|
|
|
|
loop:
|
|
%i8 = phi i8 [0, %entry], [%i8.inc, %loop]
|
|
%i32 = phi i32 [0, %entry], [%i32.inc, %loop]
|
|
|
|
%ptr = getelementptr inbounds i32, ptr %p, i8 %i8
|
|
store i32 %i32, ptr %ptr
|
|
|
|
%i8.inc = add i8 %i8, 1
|
|
%i32.inc = add i32 %i32, 1
|
|
|
|
%cmp.loop = icmp ult i32 %i32, %len
|
|
br i1 %cmp.loop, label %loop, label %exit, !prof !2
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
!0 = !{!"function_entry_count", i64 13}
|
|
!1 = !{!"branch_weights", i32 12, i32 1}
|
|
!2 = !{!"branch_weights", i32 1234, i32 1}
|
|
;.
|
|
; MAINVF4IC1_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
|
|
; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
|
|
; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC1_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154}
|
|
; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7}
|
|
; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4}
|
|
; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 0}
|
|
; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]}
|
|
; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
|
|
; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC1_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC1_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]}
|
|
; MAINVF4IC1_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3}
|
|
;.
|
|
; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13}
|
|
; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1}
|
|
; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127}
|
|
; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153}
|
|
; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]}
|
|
; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
; MAINVF4IC2_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154}
|
|
; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7}
|
|
; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4}
|
|
; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 0}
|
|
; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]}
|
|
; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0}
|
|
; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3}
|
|
; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1}
|
|
; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]}
|
|
; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3}
|
|
;.
|