Files
llvm-project/llvm/test/Transforms/LoopVectorize/explicit_outer_detection.ll
David Sherwood 4cd2db44f7 [LV][NFC] Remove llvm.ident, tbaa and other attributes from tests (#191375)
While in this area I also removed unnecessary annotations for wchar_size
and also cleaned up some more function attributes.
2026-04-10 13:46:27 +01:00

228 lines
7.1 KiB
LLVM

; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize --disable-output -S 2>&1 | FileCheck %s
; REQUIRES: asserts
; Verify that outer loops annotated only with the expected explicit
; vectorization hints are collected for vectorization instead of inner loops.
; Root C/C++ source code for all the test cases
; void foo(int *a, int *b, int N, int M)
; {
; int i, j;
; #pragma clang loop vectorize(enable)
; for (i = 0; i < N; i++) {
; for (j = 0; j < M; j++) {
; a[i*M+j] = bptr b[i*M+j];
; }
; }
; }
; Case 1: Annotated outer loop WITH vector width information must be collected.
; CHECK-LABEL: vector_width
; CHECK: LV: Loop hints: force=enabled width=4 interleave=0
; CHECK: LV: We can vectorize this outer loop!
; CHECK: LV: Using user VF 4 to build VPlans.
; CHECK-NOT: LV: Loop hints: force=?
; CHECK-NOT: LV: Found a loop: inner.body
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @vector_width(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
entry:
%cmp32 = icmp sgt i32 %N, 0
br i1 %cmp32, label %outer.ph, label %for.end15
outer.ph:
%cmp230 = icmp sgt i32 %M, 0
%0 = sext i32 %M to i64
%wide.trip.count = zext i32 %M to i64
%wide.trip.count38 = zext i32 %N to i64
br label %outer.body
outer.body:
%indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
br i1 %cmp230, label %inner.ph, label %outer.inc
inner.ph:
%1 = mul nsw i64 %indvars.iv35, %0
br label %inner.body
inner.body:
%indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
%2 = add nsw i64 %indvars.iv, %1
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
%3 = load i32, ptr %arrayidx, align 4
%mul8 = mul nsw i32 %3, %3
%arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
store i32 %mul8, ptr %arrayidx12, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %outer.inc, label %inner.body
outer.inc:
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !6
for.end15:
ret void
}
; Case 2: Annotated outer loop WITHOUT vector width information must be collected.
; CHECK-LABEL: case2
; CHECK: LV: Loop hints: force=enabled width=0 interleave=0
; CHECK: LV: We can vectorize this outer loop!
; CHECK: LV: Using VF 1 to build VPlans.
define void @case2(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
entry:
%cmp32 = icmp sgt i32 %N, 0
br i1 %cmp32, label %outer.ph, label %for.end15
outer.ph:
%cmp230 = icmp sgt i32 %M, 0
%0 = sext i32 %M to i64
%wide.trip.count = zext i32 %M to i64
%wide.trip.count38 = zext i32 %N to i64
br label %outer.body
outer.body:
%indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
br i1 %cmp230, label %inner.ph, label %outer.inc
inner.ph:
%1 = mul nsw i64 %indvars.iv35, %0
br label %inner.body
inner.body:
%indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
%2 = add nsw i64 %indvars.iv, %1
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
%3 = load i32, ptr %arrayidx, align 4
%mul8 = mul nsw i32 %3, %3
%arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
store i32 %mul8, ptr %arrayidx12, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %outer.inc, label %inner.body
outer.inc:
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !9
for.end15:
ret void
}
; Case 3: Annotated outer loop WITH vector width and interleave information
; doesn't have to be collected.
; CHECK-LABEL: case3
; CHECK-NOT: LV: Loop hints: force=enabled
; CHECK-NOT: LV: We can vectorize this outer loop!
; CHECK: LV: Loop hints: force=?
; CHECK: LV: Found a loop: inner.body
define void @case3(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
entry:
%cmp32 = icmp sgt i32 %N, 0
br i1 %cmp32, label %outer.ph, label %for.end15
outer.ph:
%cmp230 = icmp sgt i32 %M, 0
%0 = sext i32 %M to i64
%wide.trip.count = zext i32 %M to i64
%wide.trip.count38 = zext i32 %N to i64
br label %outer.body
outer.body:
%indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
br i1 %cmp230, label %inner.ph, label %outer.inc
inner.ph:
%1 = mul nsw i64 %indvars.iv35, %0
br label %inner.body
inner.body:
%indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
%2 = add nsw i64 %indvars.iv, %1
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
%3 = load i32, ptr %arrayidx, align 4
%mul8 = mul nsw i32 %3, %3
%arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
store i32 %mul8, ptr %arrayidx12, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %outer.inc, label %inner.body
outer.inc:
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !11
for.end15:
ret void
}
; Case 4: Outer loop without any explicit vectorization annotation doesn't have
; to be collected.
; CHECK-LABEL: case4
; CHECK-NOT: LV: Loop hints: force=enabled
; CHECK-NOT: LV: We can vectorize this outer loop!
; CHECK: LV: Loop hints: force=?
; CHECK: LV: Found a loop: inner.body
define void @case4(ptr nocapture %a, ptr nocapture readonly %b, i32 %N, i32 %M) {
entry:
%cmp32 = icmp sgt i32 %N, 0
br i1 %cmp32, label %outer.ph, label %for.end15
outer.ph:
%cmp230 = icmp sgt i32 %M, 0
%0 = sext i32 %M to i64
%wide.trip.count = zext i32 %M to i64
%wide.trip.count38 = zext i32 %N to i64
br label %outer.body
outer.body:
%indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
br i1 %cmp230, label %inner.ph, label %outer.inc
inner.ph:
%1 = mul nsw i64 %indvars.iv35, %0
br label %inner.body
inner.body:
%indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
%2 = add nsw i64 %indvars.iv, %1
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %2
%3 = load i32, ptr %arrayidx, align 4
%mul8 = mul nsw i32 %3, %3
%arrayidx12 = getelementptr inbounds i32, ptr %a, i64 %2
store i32 %mul8, ptr %arrayidx12, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %outer.inc, label %inner.body
outer.inc:
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
br i1 %exitcond39, label %for.end15, label %outer.body
for.end15:
ret void
}
; Case 1
!6 = distinct !{!6, !7, !8}
!7 = !{!"llvm.loop.vectorize.width", i32 4}
!8 = !{!"llvm.loop.vectorize.enable", i1 true}
; Case 2
!9 = distinct !{!9, !8}
; Case 3
!10 = !{!"llvm.loop.interleave.count", i32 2}
!11 = distinct !{!11, !7, !10, !8}