This is precommitting a full reproducer of one of our motivating examples. Looking at a full reproducer is helpful for further discussion on DependenceAnalysis and Delinearization issues and the runtime predicates discussion. I appreciate that this is a larger than usual test case, but that is by design, because I think it is useful to look at the whole thing with all of its complexities. I have given useful names to all the relevant loop variables, and the relevant blocks in these loops and their functions, but have intentionally not done that for others as there are quite a few more.
156 lines
5.8 KiB
LLVM
156 lines
5.8 KiB
LLVM
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S
|
|
; RUN: FileCheck --input-file=%t %s
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
|
|
|
|
; This is a reduced test case for the example in "large-nested-6d.ll". For a
|
|
; full description of the purpose this test and its complexities, see that file.
|
|
;
|
|
; This reproducer contains the perfectly nested sub part of that bigger loop
|
|
; nest:
|
|
;
|
|
; for i=1 to NX
|
|
; for j=1 to NY
|
|
; for IL=1 to NX
|
|
; load GlobC(i,IL,L)
|
|
; load GlobG(i,IL,L)
|
|
; load GlobE(i,IL,L)
|
|
; load GlobI(i,IL,L)
|
|
; for JL=1 to NY
|
|
; load GlobD(j,JL,M)
|
|
; load GlobH(j,JL,M)
|
|
; load GlobF(j,JL,M)
|
|
; load GlobJ(j,JL,M)
|
|
; store GlobL(NY*i+j,NY*IL+JL)
|
|
; End
|
|
; End
|
|
; End
|
|
; End
|
|
;
|
|
; This reproducer is useful to focus on only on the 2nd challenge: the data
|
|
; dependence analysis problem, and not worry about the rest of loop nest
|
|
; structure.
|
|
;
|
|
; TODO:
|
|
;
|
|
; If loop-interchange is able to deal with imperfectly nested loops, this
|
|
; test is redundant and we only need to keep "large-nested-6d.ll".
|
|
;
|
|
; CHECK: --- !Analysis
|
|
; CHECK-NEXT: Pass: loop-interchange
|
|
; CHECK-NEXT: Name: Dependence
|
|
; CHECK-NEXT: Function: test
|
|
; CHECK-NEXT: Args:
|
|
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
|
|
; CHECK-NEXT: ...
|
|
; CHECK-NEXT: --- !Missed
|
|
; CHECK-NEXT: Pass: loop-interchange
|
|
; CHECK-NEXT: Name: Dependence
|
|
; CHECK-NEXT: Function: test
|
|
; CHECK-NEXT: Args:
|
|
; CHECK-NEXT: - String: All loops have dependencies in all directions.
|
|
; CHECK-NEXT: ...
|
|
|
|
@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
|
|
@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
|
|
|
|
define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) {
|
|
entry:
|
|
%17 = load i32, ptr %7, align 4
|
|
%18 = sext i32 %17 to i64
|
|
%20 = load i32, ptr %8, align 4
|
|
%21 = sext i32 %20 to i64
|
|
%cmp1 = icmp sgt i32 %17, 0
|
|
%cmp2 = icmp sgt i32 %20, 0
|
|
%cond = and i1 %cmp1, %cmp2
|
|
br i1 %cond, label %preheader, label %exit
|
|
|
|
preheader:
|
|
br label %i.header
|
|
|
|
i.header:
|
|
%i = phi i64 [ %i.next, %i.latch ], [ 1, %preheader ]
|
|
%92 = add nsw i64 -55, %i
|
|
%93 = add nsw i64 %i, -1
|
|
%94 = mul nsw i64 %93, %21
|
|
%invariant.gep = getelementptr double, ptr @GlobL, i64 %94
|
|
br label %j.header
|
|
|
|
j.header:
|
|
%j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ]
|
|
%95 = add nsw i64 -55, %j
|
|
%gep358 = getelementptr double, ptr %invariant.gep, i64 %j
|
|
br label %IL.header
|
|
|
|
IL.header:
|
|
%IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ]
|
|
%96 = mul nuw nsw i64 %IL, 54
|
|
%97 = add nsw i64 %92, %96
|
|
%98 = getelementptr double, ptr @GlobC, i64 %97
|
|
%99 = load double, ptr %98, align 8
|
|
%100 = getelementptr double, ptr @GlobG, i64 %97
|
|
%101 = load double, ptr %100, align 8
|
|
%102 = getelementptr double, ptr @GlobE, i64 %97
|
|
%103 = load double, ptr %102, align 8
|
|
%104 = getelementptr double, ptr @GlobI, i64 %97
|
|
%105 = load double, ptr %104, align 8
|
|
%106 = add nsw i64 %IL, -1
|
|
%107 = mul nsw i64 %106, %21
|
|
br label %JL.body
|
|
|
|
JL.body:
|
|
%JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ]
|
|
%109 = mul nuw nsw i64 %JL, 54
|
|
%110 = add nsw i64 %95, %109
|
|
%111 = getelementptr double, ptr @GlobD, i64 %110
|
|
%112 = load double, ptr %111, align 8
|
|
%113 = fmul fast double %112, %99
|
|
%114 = getelementptr double, ptr @GlobH, i64 %110
|
|
%115 = load double, ptr %114, align 8
|
|
%116 = fmul fast double %115, %101
|
|
%117 = fadd fast double %116, %113
|
|
%118 = getelementptr double, ptr @GlobF, i64 %110
|
|
%119 = load double, ptr %118, align 8
|
|
%120 = fmul fast double %119, %103
|
|
%121 = fadd fast double %117, %120
|
|
%122 = getelementptr double, ptr @GlobJ, i64 %110
|
|
%123 = load double, ptr %122, align 8
|
|
%124 = fmul fast double %123, %105
|
|
%125 = fadd fast double %121, %124
|
|
%126 = add nsw i64 %JL, %107
|
|
%.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000
|
|
%gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us
|
|
%127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008
|
|
store double %125, ptr %127, align 8
|
|
%JL.next = add nuw nsw i64 %JL, 1
|
|
%exitcond.not = icmp eq i64 %JL, %21
|
|
br i1 %exitcond.not, label %IL.latch, label %JL.body
|
|
|
|
IL.latch:
|
|
%IL.next = add nuw nsw i64 %IL, 1
|
|
%exitcond320.not = icmp eq i64 %IL, %18
|
|
br i1 %exitcond320.not, label %j.latch, label %IL.header
|
|
|
|
j.latch:
|
|
%j.next = add nuw nsw i64 %j, 1
|
|
%exitcond324.not = icmp eq i64 %j, %21
|
|
br i1 %exitcond324.not, label %i.latch, label %j.header
|
|
|
|
i.latch:
|
|
%i.next = add nuw nsw i64 %i, 1
|
|
%exitcond328.not = icmp eq i64 %i, %18
|
|
br i1 %exitcond328.not, label %exit, label %i.header
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
|