llvm-project/llvm/test/Transforms/LoopInterchange/large-nested-6d.ll

; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -pass-remarks='loop-interchange' -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -disable-output -S
; RUN: FileCheck --input-file=%t %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"

; The IR test case below is a full and representative motivating example
; for loop-interchange containing a more complex loop nest structure that
; corresponds to this pseudo-code:
;
;      for L=1 to NX
;       for M=1 to NY
;        for i=1 to NX
;         for j=1 to NY
;          for IL=1 to NX
;           load GlobC(i,IL,L)
;           load GlobG(i,IL,L)
;           load GlobE(i,IL,L)
;           load GlobI(i,IL,L)
;           for JL=1 to NY
;            load GlobD(j,JL,M)
;            load GlobH(j,JL,M)
;            load GlobF(j,JL,M)
;            load GlobJ(j,JL,M)
;            store GlobL(NY*i+j,NY*IL+JL)
;           End
;          End
;         End
;        End
;        // Stmt 2
;        // Stmt 3
;        // Stmt 4
;      End
;     End
;
; It is important to note here that this comes from Fortran code, which uses a
; column-major data layout, so loops 'j' and 'JL' should be interchanged. I.e.
; in the IR below, basic block JL.body is part of the loop that we would like
; like to see interchanged as there are 4 loads and 1 store that are
; unit-strided over 'j', so making 'j' loop the innermost is preferable here.
;
; TODO:
;
; There are a few issues that prevent loop-interchange to perform its
; transformation on this test case:
;
; 1. LoopNest checks: the first check that is perform is whether loop 'L.header'
;    and 'M.header' are perfectly nested, which they are not. It needs to be
;    investigate why the whole loop nest rooted under L is rejected as a
;    candidate.
;
; 2. DependenceAnalysis: it finds this dependency:
;
;    Found output dependency between Src and Dst
;      Src:  store double %46, ptr %48, align 8
;      Dst:  store double %46, ptr %48, align 8
;
;
; CHECK:       --- !Missed
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            UnsupportedLoopNestDepth
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          'Unsupported depth of loop nest, the supported range is ['
; CHECK-NEXT:    - String:          '2'
; CHECK-NEXT:    - String:          ', '
; CHECK-NEXT:    - String:          '10'
; CHECK-NEXT:    - String:          "].\n"
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Analysis
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          Computed dependence info, invoking the transform.
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Missed
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          Cannot interchange loops due to dependences.
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Missed
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            UnsupportedLoopNestDepth
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          'Unsupported depth of loop nest, the supported range is ['
; CHECK-NEXT:    - String:          '2'
; CHECK-NEXT:    - String:          ', '
; CHECK-NEXT:    - String:          '10'
; CHECK-NEXT:    - String:          "].\n"
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Analysis
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          Computed dependence info, invoking the transform.
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Missed
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          All loops have dependencies in all directions.
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Analysis
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          Computed dependence info, invoking the transform.
; CHECK-NEXT:  ...
; CHECK-NEXT:  --- !Missed
; CHECK-NEXT:  Pass:            loop-interchange
; CHECK-NEXT:  Name:            Dependence
; CHECK-NEXT:  Function:        test
; CHECK-NEXT:  Args:
; CHECK-NEXT:    - String:          All loops have dependencies in all directions.
; CHECK-NEXT:  ...

@GlobC = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobD = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobE = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobF = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobG = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobH = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobI = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobJ = local_unnamed_addr global [54 x [54 x [54 x double]]] zeroinitializer
@GlobK = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
@GlobL = local_unnamed_addr global [1000 x [1000 x double]] zeroinitializer
@GlobM = local_unnamed_addr global [2500 x double] zeroinitializer

define void @test(ptr noalias readonly captures(none) %0, ptr noalias readonly captures(none) %1, ptr noalias captures(none) %2, ptr noalias captures(none) %3, ptr noalias readonly captures(none) %4, ptr noalias readonly captures(none) %5, ptr noalias readonly captures(none) %6, ptr noalias readonly captures(none) %7, ptr noalias readonly captures(none) %8, ptr noalias readonly captures(none) %9) {
  %11 = alloca [2500 x double], align 8
  %12 = load i32, ptr %4, align 4
  %13 = tail call i32 @llvm.smax.i32(i32 %12, i32 0)
  %14 = zext nneg i32 %13 to i64
  %15 = load i32, ptr %9, align 4
  %.not = icmp eq i32 %15, 1
  br i1 %.not, label %171, label %16

16:
  %17 = load i32, ptr %7, align 4
  %18 = sext i32 %17 to i64
  %19 = icmp sgt i32 %17, 0
  br i1 %19, label %.lr.ph286, label %._crit_edge287

.lr.ph286:
  %20 = load i32, ptr %8, align 4
  %21 = sext i32 %20 to i64
  %22 = icmp sgt i32 %20, 0
  br i1 %22, label %preheader.L, label %._crit_edge287

preheader.L:
  %23 = load i32, ptr %5, align 4
  %24 = tail call i32 @llvm.smax.i32(i32 %23, i32 0)
  %25 = zext nneg i32 %24 to i64
  %26 = load i32, ptr %6, align 4
  %27 = sext i32 %26 to i64
  %28 = getelementptr double, ptr %1, i64 %27
  %.not241270.us = icmp slt i32 %23, 1
  %29 = shl nuw nsw i64 %25, 3
  %30 = add nuw nsw i64 %25, 2
  %31 = icmp sgt i32 %23, 0
  %.neg = sext i1 %31 to i64
  %32 = add nsw i64 %30, %.neg
  br label %L.header

L.header:
  %L = phi i64 [ %L.next, %L.latch ], [ 1, %preheader.L ]
  %33 = mul nuw nsw i64 %L, 2916
  %34 = add nsw i64 %33, -2971
  %35 = add nsw i64 %L, -1
  %36 = mul nsw i64 %35, %21
  br label %M.header

exit.i:
  br i1 %.not241270.us, label %._crit_edge275.us.thread, label %.preheader258.us.preheader

.lr.ph274.us:
  %37 = phi i64 [ %48, %.lr.ph274.us ], [ %25, %.preheader260.us ]
  %38 = phi double [ %46, %.lr.ph274.us ], [ 0.000000e+00, %.preheader260.us ]
  %39 = phi i64 [ %47, %.lr.ph274.us ], [ 1, %.preheader260.us ]
  %40 = add nsw i64 %39, -1
  %41 = getelementptr double, ptr %28, i64 %40
  %42 = load double, ptr %41, align 8
  %43 = getelementptr double, ptr @GlobM, i64 %40
  %44 = load double, ptr %43, align 8
  %45 = fmul fast double %44, %42
  %46 = fadd fast double %45, %38
  %47 = add nuw nsw i64 %39, 1
  %48 = add nsw i64 %37, -1
  %.not242.us = icmp eq i64 %48, 0
  br i1 %.not242.us, label %.lr.ph278.us.preheader, label %.lr.ph274.us

.lr.ph278.us.preheader:
  %.lcssa = phi double [ %46, %.lr.ph274.us ]
  %49 = add nsw i64 %M, %36
  %50 = getelementptr double, ptr %11, i64 %49
  %51 = getelementptr i8, ptr %50, i64 -8
  store double %.lcssa, ptr %51, align 8
  %52 = getelementptr double, ptr @GlobK, i64 %49
  %53 = getelementptr i8, ptr %52, i64 -8
  br label %.lr.ph278.us

latch.M.loopexit:
  br label %latch.M

latch.M:
  %M.next = add nuw nsw i64 %M, 1
  %exitcond335.not = icmp eq i64 %M, %21
  br i1 %exitcond335.not, label %L.latch, label %M.header

.lr.ph278.us:
  %54 = phi i64 [ %133, %._crit_edge279.us ], [ 1, %.lr.ph278.us.preheader ]
  %55 = add nsw i64 %54, -1
  %.idx244.us = mul nuw nsw i64 %55, 8000
  %56 = getelementptr i8, ptr @GlobL, i64 %.idx244.us
  br label %57

57:
  %58 = phi i64 [ %25, %.lr.ph278.us ], [ %69, %57 ]
  %59 = phi double [ 0.000000e+00, %.lr.ph278.us ], [ %67, %57 ]
  %60 = phi i64 [ 1, %.lr.ph278.us ], [ %68, %57 ]
  %61 = add nsw i64 %60, -1
  %62 = getelementptr double, ptr %56, i64 %61
  %63 = load double, ptr %62, align 8
  %64 = getelementptr double, ptr %28, i64 %61
  %65 = load double, ptr %64, align 8
  %66 = fmul fast double %65, %63
  %67 = fadd fast double %66, %59
  %68 = add nuw nsw i64 %60, 1
  %69 = add nsw i64 %58, -1
  %.not243.us = icmp eq i64 %69, 0
  br i1 %.not243.us, label %._crit_edge279.us, label %57

70:
  %71 = phi i64 [ %25, %.preheader258.us ], [ %81, %70 ]
  %72 = phi i64 [ 1, %.preheader258.us ], [ %80, %70 ]
  %73 = add nsw i64 %72, -1
  %74 = getelementptr double, ptr @GlobM, i64 %73
  %75 = load double, ptr %74, align 8
  %76 = getelementptr double, ptr %84, i64 %73
  %77 = load double, ptr %76, align 8
  %78 = fmul fast double %86, %77
  %79 = fadd fast double %78, %75
  store double %79, ptr %74, align 8
  %80 = add nuw nsw i64 %72, 1
  %81 = add nsw i64 %71, -1
  %.not245.us = icmp eq i64 %81, 0
  br i1 %.not245.us, label %._crit_edge.us, label %70

.preheader258.us:
  %82 = phi i64 [ %128, %._crit_edge.us ], [ 1, %.preheader258.us.preheader ]
  %83 = add nsw i64 %82, -1
  %.idx246.us = mul nuw nsw i64 %83, 8000
  %84 = getelementptr i8, ptr @GlobL, i64 %.idx246.us
  %85 = getelementptr double, ptr %28, i64 %83
  %86 = load double, ptr %85, align 8
  br label %70

.preheader260.us:
  br label %.lr.ph274.us

._crit_edge275.us.thread:
  %87 = getelementptr double, ptr %11, i64 %M
  %88 = getelementptr double, ptr %87, i64 %36
  %89 = getelementptr i8, ptr %88, i64 -8
  store double 0.000000e+00, ptr %89, align 8
  br label %latch.M

.preheader258.us.preheader:
  call void @llvm.memset.p0.i64(ptr nonnull align 16 @GlobM, i8 0, i64 %29, i1 false)
  br label %.preheader258.us

M.header:
  %M = phi i64 [ 1, %L.header ], [ %M.next, %latch.M ]
  %90 = mul nuw nsw i64 %M, 2916
  %91 = add nsw i64 %90, -2971
  br label %i.header

i.header:
  %i = phi i64 [ %i.next, %i.latch ], [ 1, %M.header ]
  %92 = add nsw i64 %34, %i
  %93 = add nsw i64 %i, -1
  %94 = mul nsw i64 %93, %21
  %invariant.gep = getelementptr double, ptr @GlobL, i64 %94
  br label %j.header

j.header:
  %j = phi i64 [ %j.next, %j.latch ], [ 1, %i.header ]
  %95 = add nsw i64 %91, %j
  %gep358 = getelementptr double, ptr %invariant.gep, i64 %j
  br label %IL.header

IL.header:
  %IL = phi i64 [ %IL.next, %IL.latch ], [ 1, %j.header ]
  %96 = mul nuw nsw i64 %IL, 54
  %97 = add nsw i64 %92, %96
  %98 = getelementptr double, ptr @GlobC, i64 %97
  %99 = load double, ptr %98, align 8
  %100 = getelementptr double, ptr @GlobG, i64 %97
  %101 = load double, ptr %100, align 8
  %102 = getelementptr double, ptr @GlobE, i64 %97
  %103 = load double, ptr %102, align 8
  %104 = getelementptr double, ptr @GlobI, i64 %97
  %105 = load double, ptr %104, align 8
  %106 = add nsw i64 %IL, -1
  %107 = mul nsw i64 %106, %21
  br label %JL.body

JL.body:
  %JL = phi i64 [ %JL.next, %JL.body ], [ 1, %IL.header ]
  %109 = mul nuw nsw i64 %JL, 54
  %110 = add nsw i64 %95, %109
  %111 = getelementptr double, ptr @GlobD, i64 %110
  %112 = load double, ptr %111, align 8
  %113 = fmul fast double %112, %99
  %114 = getelementptr double, ptr @GlobH, i64 %110
  %115 = load double, ptr %114, align 8
  %116 = fmul fast double %115, %101
  %117 = fadd fast double %116, %113
  %118 = getelementptr double, ptr @GlobF, i64 %110
  %119 = load double, ptr %118, align 8
  %120 = fmul fast double %119, %103
  %121 = fadd fast double %117, %120
  %122 = getelementptr double, ptr @GlobJ, i64 %110
  %123 = load double, ptr %122, align 8
  %124 = fmul fast double %123, %105
  %125 = fadd fast double %121, %124
  %126 = add nsw i64 %JL, %107
  %.idx247.us.us.us.us.us.us = mul nsw i64 %126, 8000
  %gep.us.us.us.us.us.us = getelementptr i8, ptr %gep358, i64 %.idx247.us.us.us.us.us.us
  %127 = getelementptr i8, ptr %gep.us.us.us.us.us.us, i64 -8008
  store double %125, ptr %127, align 8
  %JL.next = add nuw nsw i64 %JL, 1
  %exitcond.not = icmp eq i64 %JL, %21
  br i1 %exitcond.not, label %IL.latch, label %JL.body

IL.latch:
  %IL.next = add nuw nsw i64 %IL, 1
  %exitcond320.not = icmp eq i64 %IL, %18
  br i1 %exitcond320.not, label %j.latch, label %IL.header

j.latch:
  %j.next = add nuw nsw i64 %j, 1
  %exitcond324.not = icmp eq i64 %j, %21
  br i1 %exitcond324.not, label %i.latch, label %j.header

i.latch:
  %i.next = add nuw nsw i64 %i, 1
  %exitcond328.not = icmp eq i64 %i, %18
  br i1 %exitcond328.not, label %exit.i, label %i.header

._crit_edge.us:
  %128 = add nuw nsw i64 %82, 1
  %exitcond329.not = icmp eq i64 %128, %32
  br i1 %exitcond329.not, label %.preheader260.us, label %.preheader258.us

._crit_edge279.us:
  %.lcssa360 = phi double [ %67, %57 ]
  %129 = getelementptr double, ptr @GlobM, i64 %55
  %130 = load double, ptr %129, align 8
  %131 = fadd fast double %130, %.lcssa360
  %132 = getelementptr i8, ptr %53, i64 %.idx244.us
  store double %131, ptr %132, align 8
  %133 = add nuw nsw i64 %54, 1
  %exitcond331.not = icmp eq i64 %133, %32
  br i1 %exitcond331.not, label %latch.M.loopexit, label %.lr.ph278.us

L.latch:
  %L.next = add nuw nsw i64 %L, 1
  %exitcond339.not = icmp eq i64 %L, %18
  br i1 %exitcond339.not, label %exit.L, label %L.header

exit.L:
  br label %._crit_edge287

._crit_edge287:
  %134 = load i32, ptr %6, align 4
  %135 = load i32, ptr %5, align 4
  %136 = tail call i32 @llvm.smax.i32(i32 %135, i32 0)
  %137 = zext nneg i32 %136 to i64
  %138 = sext i32 %134 to i64
  %139 = getelementptr double, ptr %2, i64 %138
  %140 = shl nuw nsw i64 %137, 3
  %.not236 = icmp slt i32 %135, 1
  %141 = select i1 %.not236, i64 1, i64 %140
  %142 = tail call ptr @malloc(i64 %141)
  br i1 %.not236, label %._crit_edge294, label %.preheader254.preheader

.preheader254.preheader:
  call void @llvm.memset.p0.i64(ptr align 8 %142, i8 0, i64 %140, i1 false)
  br label %.preheader254

.preheader254:
  %143 = phi i64 [ %160, %._crit_edge ], [ 1, %.preheader254.preheader ]
  %144 = add nsw i64 %143, -1
  %.idx240 = mul nuw nsw i64 %144, 8000
  %145 = getelementptr i8, ptr %0, i64 %.idx240
  %146 = getelementptr double, ptr %11, i64 %144
  %147 = load double, ptr %146, align 8
  br label %148

.preheader253:
  br label %.lr.ph293

148:
  %149 = phi i64 [ %137, %.preheader254 ], [ %159, %148 ]
  %150 = phi i64 [ 1, %.preheader254 ], [ %158, %148 ]
  %151 = add nsw i64 %150, -1
  %152 = getelementptr double, ptr %142, i64 %151
  %153 = load double, ptr %152, align 8
  %154 = getelementptr double, ptr %145, i64 %151
  %155 = load double, ptr %154, align 8
  %156 = fmul fast double %147, %155
  %157 = fadd fast double %156, %153
  store double %157, ptr %152, align 8
  %158 = add nuw nsw i64 %150, 1
  %159 = add nsw i64 %149, -1
  %.not239 = icmp eq i64 %159, 0
  br i1 %.not239, label %._crit_edge, label %148

._crit_edge:
  %160 = add nuw nsw i64 %143, 1
  %exitcond341.not = icmp eq i64 %143, %137
  br i1 %exitcond341.not, label %.preheader253, label %.preheader254

.lr.ph293:
  %161 = phi i64 [ %170, %.lr.ph293 ], [ %137, %.preheader253 ]
  %162 = phi i64 [ %169, %.lr.ph293 ], [ 1, %.preheader253 ]
  %163 = add nsw i64 %162, -1
  %164 = getelementptr double, ptr %139, i64 %163
  %165 = getelementptr double, ptr %142, i64 %163
  %166 = load double, ptr %165, align 8
  %167 = load double, ptr %164, align 8
  %168 = fsub fast double %167, %166
  store double %168, ptr %164, align 8
  %169 = add nuw nsw i64 %162, 1
  %170 = add nsw i64 %161, -1
  %.not238 = icmp eq i64 %170, 0
  br i1 %.not238, label %._crit_edge294.loopexit359, label %.lr.ph293

171:
  %172 = load i32, ptr %6, align 4
  %173 = load i32, ptr %5, align 4
  %174 = tail call i32 @llvm.smax.i32(i32 %173, i32 0)
  %175 = zext nneg i32 %174 to i64
  %176 = shl nuw nsw i64 %175, 3
  %177 = mul i64 %176, %175
  %178 = tail call i64 @llvm.smax.i64(i64 %177, i64 1)
  %179 = tail call ptr @malloc(i64 %178)
  %.not311 = icmp slt i32 %173, 1
  br i1 %.not311, label %._crit_edge294, label %.preheader250.us.preheader

.preheader250.us.preheader:
  %180 = mul nuw nsw i64 %175, %175
  %181 = shl i64 %180, 3
  call void @llvm.memset.p0.i64(ptr align 8 %179, i8 0, i64 %181, i1 false)
  br label %.preheader250.us

.preheader250.us:
  %182 = phi i64 [ %203, %._crit_edge301.split.us ], [ 1, %.preheader250.us.preheader ]
  %183 = add nsw i64 %182, -1
  %.idx.us = mul nuw nsw i64 %183, 8000
  %184 = getelementptr i8, ptr %0, i64 %.idx.us
  %invariant.gep.us = getelementptr double, ptr @GlobK, i64 %183
  br label %.preheader249.us

185:
  %186 = phi i64 [ %175, %.preheader249.us ], [ %196, %185 ]
  %187 = phi i64 [ 1, %.preheader249.us ], [ %195, %185 ]
  %188 = add nsw i64 %187, -1
  %189 = getelementptr double, ptr %200, i64 %188
  %190 = load double, ptr %189, align 8
  %191 = getelementptr double, ptr %184, i64 %188
  %192 = load double, ptr %191, align 8
  %193 = fmul fast double %201, %192
  %194 = fadd fast double %193, %190
  store double %194, ptr %189, align 8
  %195 = add nuw nsw i64 %187, 1
  %196 = add nsw i64 %186, -1
  %.not233.us = icmp eq i64 %196, 0
  br i1 %.not233.us, label %._crit_edge300.us, label %185

.preheader249.us:
  %197 = phi i64 [ 1, %.preheader250.us ], [ %202, %._crit_edge300.us ]
  %198 = add nsw i64 %197, -1
  %199 = mul nuw nsw i64 %198, %175
  %200 = getelementptr double, ptr %179, i64 %199
  %.idx234.us = mul nuw nsw i64 %198, 8000
  %gep.us = getelementptr i8, ptr %invariant.gep.us, i64 %.idx234.us
  %201 = load double, ptr %gep.us, align 8
  br label %185

._crit_edge300.us:
  %202 = add nuw nsw i64 %197, 1
  %exitcond344.not = icmp eq i64 %197, %175
  br i1 %exitcond344.not, label %._crit_edge301.split.us, label %.preheader249.us

._crit_edge301.split.us:
  %203 = add nuw nsw i64 %182, 1
  %exitcond345.not = icmp eq i64 %182, %175
  br i1 %exitcond345.not, label %.preheader248, label %.preheader250.us

.preheader248:
  br label %.preheader.lr.ph

.preheader.lr.ph:
  %204 = sext i32 %172 to i64
  %invariant.gep306 = getelementptr double, ptr %3, i64 %204
  br label %.preheader

.preheader:
  %205 = phi i64 [ 1, %.preheader.lr.ph ], [ %221, %._crit_edge304 ]
  %206 = add nsw i64 %205, -1
  %207 = add nsw i64 %206, %204
  %208 = mul nsw i64 %207, %14
  %gep307 = getelementptr double, ptr %invariant.gep306, i64 %208
  %209 = mul nuw nsw i64 %206, %175
  %210 = getelementptr double, ptr %179, i64 %209
  br label %211

211:
  %212 = phi i64 [ %175, %.preheader ], [ %220, %211 ]
  %213 = phi i64 [ 1, %.preheader ], [ %219, %211 ]
  %214 = add nsw i64 %213, -1
  %gep = getelementptr double, ptr %gep307, i64 %214
  %215 = getelementptr double, ptr %210, i64 %214
  %216 = load double, ptr %215, align 8
  %217 = load double, ptr %gep, align 8
  %218 = fsub fast double %217, %216
  store double %218, ptr %gep, align 8
  %219 = add nuw nsw i64 %213, 1
  %220 = add nsw i64 %212, -1
  %.not232 = icmp eq i64 %220, 0
  br i1 %.not232, label %._crit_edge304, label %211

._crit_edge304:
  %221 = add nuw nsw i64 %205, 1
  %exitcond347.not = icmp eq i64 %205, %175
  br i1 %exitcond347.not, label %._crit_edge294.loopexit, label %.preheader

._crit_edge294.loopexit:
  br label %._crit_edge294

._crit_edge294.loopexit359:
  br label %._crit_edge294

._crit_edge294:
  %.sink = phi ptr [ %142, %._crit_edge287 ], [ %179, %171 ], [ %179, %._crit_edge294.loopexit ], [ %142, %._crit_edge294.loopexit359 ]
  tail call void @free(ptr %.sink)
  ret void
}

declare i64 @llvm.smax.i64(i64, i64)
declare i32 @llvm.smax.i32(i32, i32)
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg)
declare void @free(ptr allocptr noundef captures(none)) local_unnamed_addr
declare noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr