Rename the -prefer-predicate-over-epilogue flag and its associated enum values to use 'TailFold' terminology instead of 'Predicate'. The term 'Predicate' is overloaded in the vectorizer context and would cause further confusion as more tail-folding styles are added.
73 lines
3.9 KiB
LLVM
73 lines
3.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -passes=loop-vectorize -tail-predication=enabled -tail-folding-policy=must-fold-tail -enable-wide-lane-mask -S | FileCheck %s
|
|
|
|
target triple = "thumbv8.1m.main-arm-unknown-eabihf"
|
|
|
|
define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 {
|
|
; CHECK-LABEL: define void @f0(
|
|
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[VAL:%.*]] = icmp sgt i64 [[N]], 0
|
|
; CHECK-NEXT: br i1 [[VAL]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
|
|
; CHECK: [[FOR_BODY_PREHEADER]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 31
|
|
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 32
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 16
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[INDEX]], i64 [[N]])
|
|
; CHECK-NEXT: [[ACTIVE_LANE_MASK1:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[TMP0]], i64 [[N]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16
|
|
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr align 1 [[TMP1]], <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> poison)
|
|
; CHECK-NEXT: [[WIDE_MASKED_LOAD2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr align 1 [[TMP3]], <16 x i1> [[ACTIVE_LANE_MASK1]], <16 x i8> poison)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 3)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i8> [[WIDE_MASKED_LOAD2]], splat (i8 3)
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16
|
|
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP4]], ptr align 1 [[TMP6]], <16 x i1> [[ACTIVE_LANE_MASK]])
|
|
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP5]], ptr align 1 [[TMP8]], <16 x i1> [[ACTIVE_LANE_MASK1]])
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT:.*]]
|
|
; CHECK: [[FOR_END_LOOPEXIT]]:
|
|
; CHECK-NEXT: br label %[[FOR_END]]
|
|
; CHECK: [[FOR_END]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%val = icmp sgt i64 %n, 0
|
|
br i1 %val, label %for.body, label %for.end
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%arrayidx = getelementptr inbounds i8, ptr %src, i64 %indvars.iv
|
|
%0 = load i8, ptr %arrayidx, align 1
|
|
%mul = mul i8 %0, 3
|
|
%arrayidx3 = getelementptr inbounds i8, ptr %dst, i64 %indvars.iv
|
|
store i8 %mul, ptr %arrayidx3, align 1
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond.not = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nofree "target-features"="+armv8.1-m.main,+mve.fp" }
|
|
|
|
!1 = distinct !{!1, !2, !3}
|
|
!2 = !{!"llvm.loop.vectorize.width", i32 16}
|
|
!3 = !{!"llvm.loop.interleave.count", i32 2}
|
|
;.
|
|
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
|
|
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
|
|
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
|
|
;.
|