Files
Sjoerd Meijer b492b3523c [LoopInterchange] Motivating example for interchange. NFC. (#171631)
This is precommitting a full reproducer of one of our motivating
examples. Looking at a full reproducer is helpful for further discussion
on DependenceAnalysis and Delinearization issues and the runtime
predicates discussion. I appreciate that this is a larger than usual
test case, but that is by design, because I think it is useful to look
at the whole thing with all of its complexities.

I have given useful names to all the relevant loop variables, and the
relevant blocks in these loops and their functions, but have
intentionally not done that for others as there are quite a few more.
2025-12-12 08:40:18 +00:00

156 lines
5.8 KiB
LLVM

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S
; RUN: FileCheck --input-file=%t %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
; This is a reduced test case for the example in "large-nested-6d.ll". For a
; full description of the purpose this test and its complexities, see that file.
;
; This reproducer contains the perfectly nested sub part of that bigger loop
; nest:
;
; for i=1 to NX
; for j=1 to NY
; for IL=1 to NX
; load GlobC(i,IL,L)
; load GlobG(i,IL,L)
; load GlobE(i,IL,L)
; load GlobI(i,IL,L)
; for JL=1 to NY
; load GlobD(j,JL,M)
; load GlobH(j,JL,M)
; load GlobF(j,JL,M)
; load GlobJ(j,JL,M)
; store GlobL(NY*i+j,NY*IL+JL)
; End
; End
; End
; End
;
; This reproducer is useful to focus on only on the 2nd challenge: the data
; dependence analysis problem, and not worry about the rest of loop nest
; structure.
;
; TODO:
;
; If loop-interchange is able to deal with imperfectly nested loops, this
; test is redundant and we only need to keep "large-nested-6d.ll".
;
; CHECK: --- !Analysis
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: All loops have dependencies in all directions.
; CHECK-NEXT: ...
@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) {
entry:
%17 = load i32, ptr %7, align 4
%18 = sext i32 %17 to i64
%20 = load i32, ptr %8, align 4
%21 = sext i32 %20 to i64
%cmp1 = icmp sgt i32 %17, 0
%cmp2 = icmp sgt i32 %20, 0
%cond = and i1 %cmp1, %cmp2
br i1 %cond, label %preheader, label %exit
preheader:
br label %i.header
i.header:
%i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ]
%92 = add nsw i64 -55, %i
%93 = add nsw i64 %i, -1
%94 = mul nsw i64 %93, %21
%invariant.gep = getelementptr double, ptr @GlobL, i64 %94
br label %j.header
j.header:
%j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ]
%95 = add nsw i64 -55, %j
%gep358 = getelementptr double, ptr %invariant.gep, i64 %j
br label %IL.header
IL.header:
%IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ]
%96 = mul nuw nsw i64 %IL, 54
%97 = add nsw i64 %92, %96
%98 = getelementptr double, ptr @GlobC, i64 %97
%99 = load double, ptr %98, align 8
%100 = getelementptr double, ptr @GlobG, i64 %97
%101 = load double, ptr %100, align 8
%102 = getelementptr double, ptr @GlobE, i64 %97
%103 = load double, ptr %102, align 8
%104 = getelementptr double, ptr @GlobI, i64 %97
%105 = load double, ptr %104, align 8
%106 = add nsw i64 %IL, -1
%107 = mul nsw i64 %106, %21
br label %JL.body
JL.body:
%JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ]
%109 = mul nuw nsw i64 %JL, 54
%110 = add nsw i64 %95, %109
%111 = getelementptr double, ptr @GlobD, i64 %110
%112 = load double, ptr %111, align 8
%113 = fmul fast double %112, %99
%114 = getelementptr double, ptr @GlobH, i64 %110
%115 = load double, ptr %114, align 8
%116 = fmul fast double %115, %101
%117 = fadd fast double %116, %113
%118 = getelementptr double, ptr @GlobF, i64 %110
%119 = load double, ptr %118, align 8
%120 = fmul fast double %119, %103
%121 = fadd fast double %117, %120
%122 = getelementptr double, ptr @GlobJ, i64 %110
%123 = load double, ptr %122, align 8
%124 = fmul fast double %123, %105
%125 = fadd fast double %121, %124
%126 = add nsw i64 %JL, %107
%.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000
%gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us
%127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008
store double %125, ptr %127, align 8
%JL.next = add nuw nsw i64 %JL, 1
%exitcond.not = icmp eq i64 %JL, %21
br i1 %exitcond.not, label %IL.latch, label %JL.body
IL.latch:
%IL.next = add nuw nsw i64 %IL, 1
%exitcond320.not = icmp eq i64 %IL, %18
br i1 %exitcond320.not, label %j.latch, label %IL.header
j.latch:
%j.next = add nuw nsw i64 %j, 1
%exitcond324.not = icmp eq i64 %j, %21
br i1 %exitcond324.not, label %i.latch, label %j.header
i.latch:
%i.next = add nuw nsw i64 %i, 1
%exitcond328.not = icmp eq i64 %i, %18
br i1 %exitcond328.not, label %exit, label %i.header
exit:
ret void
}