Files
Ryotaro Kasuga 55a041baf3 [DA] Move no-wrap flag check into checkSubscript (#190770)
Recent patches added no-wrap flag checks to each dependence test (except
for the Banerjee MIV test) to make them sound. These fixes have been
applied one by one to ensure that each dependence test was correctly
updated and the defects were properly addressed. However, ideally, these
functions should not be called at all when the required no-wrap flags
are not set. Specifically, `classifyPair` should tag pairs as
`NonLinear` when either addrec doesn't have the no-wrap flag, which
means that the addrec is as literal non-linear.
This patch moves the existing no-wrap flag checks in the each dependence
test to `checkSubscript`, which is called by `classifyPair`. With this
change, if the addrec doesn't have the no-wrap flag, the pair will be
classified as `NonLinear` and the dependence test will not be invoked at
all. I believe this change makes the code cleaner and consistent with
the meaning of `NonLinear` classification.
Note that this patch doesn't take care of the behavioral change caused
by the Benerjee MIV test, as the test is still not sound and there are
no plans to fix it in the near future.
2026-04-08 02:38:57 +00:00

563 lines
20 KiB
LLVM

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S
; RUN: FileCheck --input-file=%t %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
; The IR test case below is a full and representative motivating example
; for loop-interchange containing a more complex loop nest structure that
; corresponds to this pseudo-code:
;
; for L=1 to NX
; for M=1 to NY
; for i=1 to NX
; for j=1 to NY
; for IL=1 to NX
; load GlobC(i,IL,L)
; load GlobG(i,IL,L)
; load GlobE(i,IL,L)
; load GlobI(i,IL,L)
; for JL=1 to NY
; load GlobD(j,JL,M)
; load GlobH(j,JL,M)
; load GlobF(j,JL,M)
; load GlobJ(j,JL,M)
; store GlobL(NY*i+j,NY*IL+JL)
; End
; End
; End
; End
; // Stmt 2
; // Stmt 3
; // Stmt 4
; End
; End
;
; It is important to note here that this comes from Fortran code, which uses a
; column-major data layout, so loops 'j' and 'JL' should be interchanged. I.e.
; in the IR below, basic block JL.body is part of the loop that we would like
; like to see interchanged as there are 4 loads and 1 store that are
; unit-strided over 'j', so making 'j' loop the innermost is preferable here.
;
; TODO:
;
; There are a few issues that prevent loop-interchange to perform its
; transformation on this test case:
;
; 1. LoopNest checks: the first check that is perform is whether loop 'L.header'
; and 'M.header' are perfectly nested, which they are not. It needs to be
; investigate why the whole loop nest rooted under L is rejected as a
; candidate.
;
; 2. DependenceAnalysis: it finds this dependency:
;
; Found output dependency between Src and Dst
; Src: store double %46, ptr %48, align 8
; Dst: store double %46, ptr %48, align 8
;
;
; CHECK: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: UnsupportedLoopNestDepth
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: 'Unsupported depth of loop nest, the supported range is ['
; CHECK-NEXT: - String: '2'
; CHECK-NEXT: - String: ', '
; CHECK-NEXT: - String: '10'
; CHECK-NEXT: - String: "].\n"
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Analysis
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Cannot interchange loops due to dependences.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: UnsupportedLoopNestDepth
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: 'Unsupported depth of loop nest, the supported range is ['
; CHECK-NEXT: - String: '2'
; CHECK-NEXT: - String: ', '
; CHECK-NEXT: - String: '10'
; CHECK-NEXT: - String: "].\n"
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Analysis
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: All loops have dependencies in all directions.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Analysis
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: test
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: All loops have dependencies in all directions.
; CHECK-NEXT: ...
@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobK = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
@GlobM = local_unnamed_addr global [2500 x double] zeroinitializer
define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) {
%11 = alloca [2500 x double], align 8
%12 = load i32, ptr %4, align 4
%13 = tail call i32 @llvm.smax.i32(i32 %12, i32 0)
%14 = zext nneg i32 %13 to i64
%15 = load i32, ptr %9, align 4
%.not = icmp eq i32 %15, 1
br i1 %.not, label %171, label %16
16:
%17 = load i32, ptr %7, align 4
%18 = sext i32 %17 to i64
%19 = icmp sgt i32 %17, 0
br i1 %19, label %.lr.ph286, label %._crit_edge287
.lr.ph286:
%20 = load i32, ptr %8, align 4
%21 = sext i32 %20 to i64
%22 = icmp sgt i32 %20, 0
br i1 %22, label %preheader.L, label %._crit_edge287
preheader.L:
%23 = load i32, ptr %5, align 4
%24 = tail call i32 @llvm.smax.i32(i32 %23, i32 0)
%25 = zext nneg i32 %24 to i64
%26 = load i32, ptr %6, align 4
%27 = sext i32 %26 to i64
%28 = getelementptr double, ptr %1, i64 %27
%.not241270.us = icmp slt i32 %23, 1
%29 = shl nuw nsw i64 %25, 3
%30 = add nuw nsw i64 %25, 2
%31 = icmp sgt i32 %23, 0
%.neg = sext i1 %31 to i64
%32 = add nsw i64 %30, %.neg
br label %L.header
L.header:
%L = phi i64 [ %L.next, %L.latch ], [ 1, %preheader.L ]
%33 = mul nuw nsw i64 %L, 2916
%34 = add nsw i64 %33, -2971
%35 = add nsw i64 %L, -1
%36 = mul nsw i64 %35, %21
br label %M.header
exit.i:
br i1 %.not241270.us, label %._crit_edge275.us.thread, label %.preheader258.us.preheader
.lr.ph274.us:
%37 = phi i64 [ %48, %.lr.ph274.us ], [ %25, %.preheader260.us ]
%38 = phi double [ %46, %.lr.ph274.us ], [ 0.000000e+00, %.preheader260.us ]
%39 = phi i64 [ %47, %.lr.ph274.us ], [ 1, %.preheader260.us ]
%40 = add nsw i64 %39, -1
%41 = getelementptr double, ptr %28, i64 %40
%42 = load double, ptr %41, align 8
%43 = getelementptr double, ptr @GlobM, i64 %40
%44 = load double, ptr %43, align 8
%45 = fmul fast double %44, %42
%46 = fadd fast double %45, %38
%47 = add nuw nsw i64 %39, 1
%48 = add nsw i64 %37, -1
%.not242.us = icmp eq i64 %48, 0
br i1 %.not242.us, label %.lr.ph278.us.preheader, label %.lr.ph274.us
.lr.ph278.us.preheader:
%.lcssa = phi double [ %46, %.lr.ph274.us ]
%49 = add nsw i64 %M, %36
%50 = getelementptr double, ptr %11, i64 %49
%51 = getelementptr i8, ptr %50, i64 -8
store double %.lcssa, ptr %51, align 8
%52 = getelementptr double, ptr @GlobK, i64 %49
%53 = getelementptr i8, ptr %52, i64 -8
br label %.lr.ph278.us
latch.M.loopexit:
br label %latch.M
latch.M:
%M.next = add nuw nsw i64 %M, 1
%exitcond335.not = icmp eq i64 %M, %21
br i1 %exitcond335.not, label %L.latch, label %M.header
.lr.ph278.us:
%54 = phi i64 [ %133, %._crit_edge279.us ], [ 1, %.lr.ph278.us.preheader ]
%55 = add nsw i64 %54, -1
%.idx244.us = mul nuw nsw i64 %55, 8000
%56 = getelementptr i8, ptr @GlobL, i64 %.idx244.us
br label %57
57:
%58 = phi i64 [ %25, %.lr.ph278.us ], [ %69, %57 ]
%59 = phi double [ 0.000000e+00, %.lr.ph278.us ], [ %67, %57 ]
%60 = phi i64 [ 1, %.lr.ph278.us ], [ %68, %57 ]
%61 = add nsw i64 %60, -1
%62 = getelementptr double, ptr %56, i64 %61
%63 = load double, ptr %62, align 8
%64 = getelementptr double, ptr %28, i64 %61
%65 = load double, ptr %64, align 8
%66 = fmul fast double %65, %63
%67 = fadd fast double %66, %59
%68 = add nuw nsw i64 %60, 1
%69 = add nsw i64 %58, -1
%.not243.us = icmp eq i64 %69, 0
br i1 %.not243.us, label %._crit_edge279.us, label %57
70:
%71 = phi i64 [ %25, %.preheader258.us ], [ %81, %70 ]
%72 = phi i64 [ 1, %.preheader258.us ], [ %80, %70 ]
%73 = add nsw i64 %72, -1
%74 = getelementptr double, ptr @GlobM, i64 %73
%75 = load double, ptr %74, align 8
%76 = getelementptr double, ptr %84, i64 %73
%77 = load double, ptr %76, align 8
%78 = fmul fast double %86, %77
%79 = fadd fast double %78, %75
store double %79, ptr %74, align 8
%80 = add nuw nsw i64 %72, 1
%81 = add nsw i64 %71, -1
%.not245.us = icmp eq i64 %81, 0
br i1 %.not245.us, label %._crit_edge.us, label %70
.preheader258.us:
%82 = phi i64 [ %128, %._crit_edge.us ], [ 1, %.preheader258.us.preheader ]
%83 = add nsw i64 %82, -1
%.idx246.us = mul nuw nsw i64 %83, 8000
%84 = getelementptr i8, ptr @GlobL, i64 %.idx246.us
%85 = getelementptr double, ptr %28, i64 %83
%86 = load double, ptr %85, align 8
br label %70
.preheader260.us:
br label %.lr.ph274.us
._crit_edge275.us.thread:
%87 = getelementptr double, ptr %11, i64 %M
%88 = getelementptr double, ptr %87, i64 %36
%89 = getelementptr i8, ptr %88, i64 -8
store double 0.000000e+00, ptr %89, align 8
br label %latch.M
.preheader258.us.preheader:
call void @llvm.memset.p0.i64(ptr nonnull align 16 @GlobM, i8 0, i64 %29, i1 false)
br label %.preheader258.us
M.header:
%M = phi i64 [ 1, %L.header ], [ %M.next, %latch.M ]
%90 = mul nuw nsw i64 %M, 2916
%91 = add nsw i64 %90, -2971
br label %i.header
i.header:
%i = phi i64 [ %i.next, %i.latch ], [ 1, %M.header ]
%92 = add nsw i64 %34, %i
%93 = add nsw i64 %i, -1
%94 = mul nsw i64 %93, %21
%invariant.gep = getelementptr double, ptr @GlobL, i64 %94
br label %j.header
j.header:
%j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ]
%95 = add nsw i64 %91, %j
%gep358 = getelementptr double, ptr %invariant.gep, i64 %j
br label %IL.header
IL.header:
%IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ]
%96 = mul nuw nsw i64 %IL, 54
%97 = add nsw i64 %92, %96
%98 = getelementptr double, ptr @GlobC, i64 %97
%99 = load double, ptr %98, align 8
%100 = getelementptr double, ptr @GlobG, i64 %97
%101 = load double, ptr %100, align 8
%102 = getelementptr double, ptr @GlobE, i64 %97
%103 = load double, ptr %102, align 8
%104 = getelementptr double, ptr @GlobI, i64 %97
%105 = load double, ptr %104, align 8
%106 = add nsw i64 %IL, -1
%107 = mul nsw i64 %106, %21
br label %JL.body
JL.body:
%JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ]
%109 = mul nuw nsw i64 %JL, 54
%110 = add nsw i64 %95, %109
%111 = getelementptr double, ptr @GlobD, i64 %110
%112 = load double, ptr %111, align 8
%113 = fmul fast double %112, %99
%114 = getelementptr double, ptr @GlobH, i64 %110
%115 = load double, ptr %114, align 8
%116 = fmul fast double %115, %101
%117 = fadd fast double %116, %113
%118 = getelementptr double, ptr @GlobF, i64 %110
%119 = load double, ptr %118, align 8
%120 = fmul fast double %119, %103
%121 = fadd fast double %117, %120
%122 = getelementptr double, ptr @GlobJ, i64 %110
%123 = load double, ptr %122, align 8
%124 = fmul fast double %123, %105
%125 = fadd fast double %121, %124
%126 = add nsw i64 %JL, %107
%.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000
%gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us
%127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008
store double %125, ptr %127, align 8
%JL.next = add nuw nsw i64 %JL, 1
%exitcond.not = icmp eq i64 %JL, %21
br i1 %exitcond.not, label %IL.latch, label %JL.body
IL.latch:
%IL.next = add nuw nsw i64 %IL, 1
%exitcond320.not = icmp eq i64 %IL, %18
br i1 %exitcond320.not, label %j.latch, label %IL.header
j.latch:
%j.next = add nuw nsw i64 %j, 1
%exitcond324.not = icmp eq i64 %j, %21
br i1 %exitcond324.not, label %i.latch, label %j.header
i.latch:
%i.next = add nuw nsw i64 %i, 1
%exitcond328.not = icmp eq i64 %i, %18
br i1 %exitcond328.not, label %exit.i, label %i.header
._crit_edge.us:
%128 = add nuw nsw i64 %82, 1
%exitcond329.not = icmp eq i64 %128, %32
br i1 %exitcond329.not, label %.preheader260.us, label %.preheader258.us
._crit_edge279.us:
%.lcssa360 = phi double [ %67, %57 ]
%129 = getelementptr double, ptr @GlobM, i64 %55
%130 = load double, ptr %129, align 8
%131 = fadd fast double %130, %.lcssa360
%132 = getelementptr i8, ptr %53, i64 %.idx244.us
store double %131, ptr %132, align 8
%133 = add nuw nsw i64 %54, 1
%exitcond331.not = icmp eq i64 %133, %32
br i1 %exitcond331.not, label %latch.M.loopexit, label %.lr.ph278.us
L.latch:
%L.next = add nuw nsw i64 %L, 1
%exitcond339.not = icmp eq i64 %L, %18
br i1 %exitcond339.not, label %exit.L, label %L.header
exit.L:
br label %._crit_edge287
._crit_edge287:
%134 = load i32, ptr %6, align 4
%135 = load i32, ptr %5, align 4
%136 = tail call i32 @llvm.smax.i32(i32 %135, i32 0)
%137 = zext nneg i32 %136 to i64
%138 = sext i32 %134 to i64
%139 = getelementptr double, ptr %2, i64 %138
%140 = shl nuw nsw i64 %137, 3
%.not236 = icmp slt i32 %135, 1
%141 = select i1 %.not236, i64 1, i64 %140
%142 = tail call ptr @malloc(i64 %141)
br i1 %.not236, label %._crit_edge294, label %.preheader254.preheader
.preheader254.preheader:
call void @llvm.memset.p0.i64(ptr align 8 %142, i8 0, i64 %140, i1 false)
br label %.preheader254
.preheader254:
%143 = phi i64 [ %160, %._crit_edge ], [ 1, %.preheader254.preheader ]
%144 = add nsw i64 %143, -1
%.idx240 = mul nuw nsw i64 %144, 8000
%145 = getelementptr i8, ptr %0, i64 %.idx240
%146 = getelementptr double, ptr %11, i64 %144
%147 = load double, ptr %146, align 8
br label %148
.preheader253:
br label %.lr.ph293
148:
%149 = phi i64 [ %137, %.preheader254 ], [ %159, %148 ]
%150 = phi i64 [ 1, %.preheader254 ], [ %158, %148 ]
%151 = add nsw i64 %150, -1
%152 = getelementptr double, ptr %142, i64 %151
%153 = load double, ptr %152, align 8
%154 = getelementptr double, ptr %145, i64 %151
%155 = load double, ptr %154, align 8
%156 = fmul fast double %147, %155
%157 = fadd fast double %156, %153
store double %157, ptr %152, align 8
%158 = add nuw nsw i64 %150, 1
%159 = add nsw i64 %149, -1
%.not239 = icmp eq i64 %159, 0
br i1 %.not239, label %._crit_edge, label %148
._crit_edge:
%160 = add nuw nsw i64 %143, 1
%exitcond341.not = icmp eq i64 %143, %137
br i1 %exitcond341.not, label %.preheader253, label %.preheader254
.lr.ph293:
%161 = phi i64 [ %170, %.lr.ph293 ], [ %137, %.preheader253 ]
%162 = phi i64 [ %169, %.lr.ph293 ], [ 1, %.preheader253 ]
%163 = add nsw i64 %162, -1
%164 = getelementptr double, ptr %139, i64 %163
%165 = getelementptr double, ptr %142, i64 %163
%166 = load double, ptr %165, align 8
%167 = load double, ptr %164, align 8
%168 = fsub fast double %167, %166
store double %168, ptr %164, align 8
%169 = add nuw nsw i64 %162, 1
%170 = add nsw i64 %161, -1
%.not238 = icmp eq i64 %170, 0
br i1 %.not238, label %._crit_edge294.loopexit359, label %.lr.ph293
171:
%172 = load i32, ptr %6, align 4
%173 = load i32, ptr %5, align 4
%174 = tail call i32 @llvm.smax.i32(i32 %173, i32 0)
%175 = zext nneg i32 %174 to i64
%176 = shl nuw nsw i64 %175, 3
%177 = mul i64 %176, %175
%178 = tail call i64 @llvm.smax.i64(i64 %177, i64 1)
%179 = tail call ptr @malloc(i64 %178)
%.not311 = icmp slt i32 %173, 1
br i1 %.not311, label %._crit_edge294, label %.preheader250.us.preheader
.preheader250.us.preheader:
%180 = mul nuw nsw i64 %175, %175
%181 = shl i64 %180, 3
call void @llvm.memset.p0.i64(ptr align 8 %179, i8 0, i64 %181, i1 false)
br label %.preheader250.us
.preheader250.us:
%182 = phi i64 [ %203, %._crit_edge301.split.us ], [ 1, %.preheader250.us.preheader ]
%183 = add nsw i64 %182, -1
%.idx.us = mul nuw nsw i64 %183, 8000
%184 = getelementptr i8, ptr %0, i64 %.idx.us
%invariant.gep.us = getelementptr double, ptr @GlobK, i64 %183
br label %.preheader249.us
185:
%186 = phi i64 [ %175, %.preheader249.us ], [ %196, %185 ]
%187 = phi i64 [ 1, %.preheader249.us ], [ %195, %185 ]
%188 = add nsw i64 %187, -1
%189 = getelementptr double, ptr %200, i64 %188
%190 = load double, ptr %189, align 8
%191 = getelementptr double, ptr %184, i64 %188
%192 = load double, ptr %191, align 8
%193 = fmul fast double %201, %192
%194 = fadd fast double %193, %190
store double %194, ptr %189, align 8
%195 = add nuw nsw i64 %187, 1
%196 = add nsw i64 %186, -1
%.not233.us = icmp eq i64 %196, 0
br i1 %.not233.us, label %._crit_edge300.us, label %185
.preheader249.us:
%197 = phi i64 [ 1, %.preheader250.us ], [ %202, %._crit_edge300.us ]
%198 = add nsw i64 %197, -1
%199 = mul nuw nsw i64 %198, %175
%200 = getelementptr double, ptr %179, i64 %199
%.idx234.us = mul nuw nsw i64 %198, 8000
%gep.us = getelementptr i8, ptr %invariant.gep.us, i64 %.idx234.us
%201 = load double, ptr %gep.us, align 8
br label %185
._crit_edge300.us:
%202 = add nuw nsw i64 %197, 1
%exitcond344.not = icmp eq i64 %197, %175
br i1 %exitcond344.not, label %._crit_edge301.split.us, label %.preheader249.us
._crit_edge301.split.us:
%203 = add nuw nsw i64 %182, 1
%exitcond345.not = icmp eq i64 %182, %175
br i1 %exitcond345.not, label %.preheader248, label %.preheader250.us
.preheader248:
br label %.preheader.lr.ph
.preheader.lr.ph:
%204 = sext i32 %172 to i64
%invariant.gep306 = getelementptr double, ptr %3, i64 %204
br label %.preheader
.preheader:
%205 = phi i64 [ 1, %.preheader.lr.ph ], [ %221, %._crit_edge304 ]
%206 = add nsw i64 %205, -1
%207 = add nsw i64 %206, %204
%208 = mul nsw i64 %207, %14
%gep307 = getelementptr double, ptr %invariant.gep306, i64 %208
%209 = mul nuw nsw i64 %206, %175
%210 = getelementptr double, ptr %179, i64 %209
br label %211
211:
%212 = phi i64 [ %175, %.preheader ], [ %220, %211 ]
%213 = phi i64 [ 1, %.preheader ], [ %219, %211 ]
%214 = add nsw i64 %213, -1
%gep = getelementptr double, ptr %gep307, i64 %214
%215 = getelementptr double, ptr %210, i64 %214
%216 = load double, ptr %215, align 8
%217 = load double, ptr %gep, align 8
%218 = fsub fast double %217, %216
store double %218, ptr %gep, align 8
%219 = add nuw nsw i64 %213, 1
%220 = add nsw i64 %212, -1
%.not232 = icmp eq i64 %220, 0
br i1 %.not232, label %._crit_edge304, label %211
._crit_edge304:
%221 = add nuw nsw i64 %205, 1
%exitcond347.not = icmp eq i64 %205, %175
br i1 %exitcond347.not, label %._crit_edge294.loopexit, label %.preheader
._crit_edge294.loopexit:
br label %._crit_edge294
._crit_edge294.loopexit359:
br label %._crit_edge294
._crit_edge294:
%.sink = phi ptr [ %142, %._crit_edge287 ], [ %179, %171 ], [ %179, %._crit_edge294.loopexit ], [ %142, %._crit_edge294.loopexit359 ]
tail call void @free(ptr %.sink)
ret void
}
declare i64 @llvm.smax.i64(i64, i64)
declare i32 @llvm.smax.i32(i32, i32)
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg)
declare void @free(ptr allocptr noundef captures(none)) local_unnamed_addr
declare noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr