[SCEVExp] Use Builder.CreateBinOp in InsertBinOp. (#154148)
SCEVExpander's builder already uses InstSimplifyFolder. Use it to construct binary ops via CreateBinOp instead of BinaryOperator::Create. This helps to simplify away a few more instructions during SCEV expansion. PR: https://github.com/llvm/llvm-project/pull/154148
This commit is contained in:
@@ -1762,6 +1762,18 @@ public:
|
||||
return Insert(BinOp, Name);
|
||||
}
|
||||
|
||||
Value *CreateNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
|
||||
bool IsNUW, bool IsNSW, const Twine &Name = "") {
|
||||
if (Value *V = Folder.FoldNoWrapBinOp(Opc, LHS, RHS, IsNUW, IsNSW))
|
||||
return V;
|
||||
Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
|
||||
if (IsNUW)
|
||||
BinOp->setHasNoUnsignedWrap(IsNUW);
|
||||
if (IsNSW)
|
||||
BinOp->setHasNoSignedWrap(IsNSW);
|
||||
return Insert(BinOp, Name);
|
||||
}
|
||||
|
||||
Value *CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name = "",
|
||||
Instruction *MDFrom = nullptr) {
|
||||
assert(Cond2->getType()->isIntOrIntVectorTy(1));
|
||||
|
||||
@@ -337,16 +337,23 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
||||
}
|
||||
|
||||
// If we haven't found this binop, insert it.
|
||||
// TODO: Use the Builder, which will make CreateBinOp below fold with
|
||||
// InstSimplifyFolder.
|
||||
Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
|
||||
BO->setDebugLoc(Loc);
|
||||
if (any(Flags & SCEV::FlagNUW))
|
||||
Builder.SetCurrentDebugLocation(Loc);
|
||||
bool IsNUW = any(Flags & SCEV::FlagNUW);
|
||||
bool IsNSW = any(Flags & SCEV::FlagNSW);
|
||||
// Don't use folder when expanding post-inc rewrites in LSRMode to preserve
|
||||
// the rewrites.
|
||||
if (LSRMode && !PostIncLoops.empty() &&
|
||||
all_of(PostIncLoops, [&](const Loop *L) {
|
||||
return !L->contains(Builder.GetInsertBlock());
|
||||
})) {
|
||||
auto *BO = BinaryOperator::Create(Opcode, LHS, RHS);
|
||||
if (IsNUW)
|
||||
BO->setHasNoUnsignedWrap();
|
||||
if (any(Flags & SCEV::FlagNSW))
|
||||
if (IsNSW)
|
||||
BO->setHasNoSignedWrap();
|
||||
|
||||
return BO;
|
||||
return Builder.Insert(BO);
|
||||
}
|
||||
return Builder.CreateNoWrapBinOp(Opcode, LHS, RHS, IsNUW, IsNSW);
|
||||
}
|
||||
|
||||
/// expandAddToGEP - Expand an addition expression with a pointer type into
|
||||
@@ -1344,7 +1351,9 @@ Value *SCEVExpander::visitAddRecExpr(SCEVUseT<const SCEVAddRecExpr *> S) {
|
||||
Value *V = expand(
|
||||
SE.getAddRecExpr(NewOps, S->getLoop(), S.getNoWrapFlags(SCEV::FlagNW)));
|
||||
BasicBlock::iterator NewInsertPt =
|
||||
findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
|
||||
isa<Instruction>(V) ? findInsertPointAfter(cast<Instruction>(V),
|
||||
&*Builder.GetInsertPoint())
|
||||
: Builder.GetInsertPoint();
|
||||
V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
|
||||
return V;
|
||||
}
|
||||
|
||||
@@ -160,10 +160,9 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
|
||||
; CHECK-NEXT: .LBB4_5: // %LJ.latch
|
||||
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
|
||||
; CHECK-NEXT: add x8, x21, #1
|
||||
; CHECK-NEXT: cmp x21, x19
|
||||
; CHECK-NEXT: str w0, [x20, x21, lsl #2]
|
||||
; CHECK-NEXT: sub x9, x8, #1
|
||||
; CHECK-NEXT: mov x21, x8
|
||||
; CHECK-NEXT: cmp x9, x19
|
||||
; CHECK-NEXT: b.ge .LBB4_2
|
||||
; CHECK-NEXT: .LBB4_6: // %LJ
|
||||
; CHECK-NEXT: // Parent Loop BB4_3 Depth=1
|
||||
|
||||
@@ -9,8 +9,7 @@ define void @test_simplify_scev_during_expansion_flags(i64 %start) {
|
||||
; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
; CHECK-NEXT: [[START_NEG:%.*]] = sub i64 0, [[START]]
|
||||
; CHECK-NEXT: [[START_MUL:%.*]] = ashr exact i64 [[START_NEG]], 2
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[START_MUL]], 2
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[TMP0]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[START_NEG]]
|
||||
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 [[TMP1]], i1 false)
|
||||
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
; CHECK: [[LOOP]]:
|
||||
|
||||
@@ -22,10 +22,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
|
||||
; DEFAULT: preheader:
|
||||
; DEFAULT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
|
||||
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
|
||||
; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
|
||||
; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
|
||||
; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
|
||||
; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
|
||||
; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
|
||||
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
|
||||
; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
|
||||
; DEFAULT-NEXT: br label [[INNER_LOOP:%.*]]
|
||||
@@ -87,10 +86,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
|
||||
; LIMIT: preheader:
|
||||
; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
|
||||
; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
|
||||
; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
|
||||
; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
|
||||
; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
|
||||
; LIMIT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
|
||||
; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
|
||||
; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
|
||||
; LIMIT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
|
||||
; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
|
||||
; LIMIT-NEXT: br label [[INNER_LOOP:%.*]]
|
||||
|
||||
@@ -20,8 +20,7 @@ define i32 @test(i1 %c.1, ptr %src) {
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i1 [[P]], [[T]]
|
||||
; CHECK-NEXT: [[ZEXT_OR:%.*]] = zext i1 [[OR]] to i32
|
||||
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw i32 [[LSR_IV]], 1
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV_NEXT]], -1
|
||||
; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[TMP0]], -1050
|
||||
; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[LSR_IV]], -1050
|
||||
; CHECK-NEXT: br i1 [[LOOP_HEADER_TERMCOND]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: [[ZEXT_OR_LCSSA:%.*]] = phi i32 [ [[ZEXT_OR]], [[LOOP_LATCH]] ]
|
||||
|
||||
@@ -16,9 +16,7 @@ define void @test1() {
|
||||
; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]]
|
||||
; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef
|
||||
; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[VAL]], 7
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3
|
||||
@@ -58,10 +56,8 @@ define void @test1() {
|
||||
; CHECK: bb20.bb15splitsplitsplit_crit_edge:
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]]
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]]
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP14]], [[TMP18]]
|
||||
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]]
|
||||
; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]]
|
||||
; CHECK: bb15splitsplitsplit:
|
||||
@@ -82,10 +78,8 @@ define void @test1() {
|
||||
; CHECK: bb26.bb15split_crit_edge:
|
||||
; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5
|
||||
; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]]
|
||||
; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5
|
||||
; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]]
|
||||
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP28]], [[TMP32]]
|
||||
; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]]
|
||||
; CHECK-NEXT: br label [[BB15SPLIT]]
|
||||
; CHECK: bb15split:
|
||||
|
||||
@@ -425,26 +425,11 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
|
||||
; CHECK-NEXT: [[PTR_START:%.*]] = inttoptr i64 [[X:%.*]] to ptr
|
||||
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[X]], 40
|
||||
; CHECK-NEXT: [[PTR_END:%.*]] = inttoptr i64 [[ADD]] to ptr
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ADD]], [[X]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = sub i2 [[TMP3]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i64
|
||||
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0
|
||||
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
|
||||
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.main.loop.iter.check:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP2]], 16
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
|
||||
; CHECK: vector.ph:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4
|
||||
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]]
|
||||
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 0
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
@@ -458,16 +443,14 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
|
||||
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4
|
||||
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
|
||||
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
|
||||
; CHECK: vec.epilog.iter.check:
|
||||
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]]
|
||||
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]]
|
||||
; CHECK: vec.epilog.ph:
|
||||
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
||||
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 32
|
||||
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
|
||||
; CHECK: vec.epilog.vector.body:
|
||||
@@ -479,10 +462,9 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 8
|
||||
; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
|
||||
; CHECK: vec.epilog.middle.block:
|
||||
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], 8
|
||||
; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
|
||||
; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
|
||||
; CHECK: vec.epilog.scalar.ph:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
|
||||
; CHECK-NEXT: br label [[LOOP:%.*]]
|
||||
; CHECK: loop:
|
||||
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
|
||||
|
||||
@@ -1013,27 +1013,11 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
|
||||
; CHECK-NEXT: [[END:%.*]] = inttoptr i64 [[ADD]] to ptr
|
||||
; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr [[BASE]], i64 24
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ADD]], -48
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[PTRTOINT]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 768614336404564651
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
|
||||
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
|
||||
; CHECK: [[VECTOR_SCEVCHECK]]:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD]] to i3
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[PTRTOINT]] to i3
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = sub i3 [[TMP5]], [[TMP6]]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = zext i3 [[TMP7]] to i64
|
||||
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0
|
||||
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
|
||||
; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 8
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
|
||||
; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
|
||||
; CHECK: [[VECTOR_PH]]:
|
||||
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 8
|
||||
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[N_VEC]], 24
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP15]]
|
||||
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[START]], i64 0
|
||||
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
||||
; CHECK: [[VECTOR_BODY]]:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
||||
@@ -1045,17 +1029,15 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
|
||||
; CHECK-NEXT: [[TMP25:%.*]] = zext <8 x i8> [[BROADCAST_SPLAT]] to <8 x i64>
|
||||
; CHECK-NEXT: [[TMP14]] = add <8 x i64> [[TMP11]], [[TMP25]]
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
|
||||
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
|
||||
; CHECK: [[MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP14]])
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
|
||||
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
|
||||
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
|
||||
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
|
||||
; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
|
||||
; CHECK: [[VEC_EPILOG_PH]]:
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP26]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP26]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[START]], i64 96
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
|
||||
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
|
||||
@@ -1069,11 +1051,10 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
|
||||
; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
|
||||
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
|
||||
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP22]])
|
||||
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP4]], 4
|
||||
; CHECK-NEXT: br i1 [[CMP_N6]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
|
||||
; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
|
||||
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP24]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_SCEVCHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP26]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
|
||||
; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP24]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
|
||||
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP26]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
|
||||
; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
; CHECK: [[LOOP]]:
|
||||
; CHECK-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], %[[LOOP]] ]
|
||||
|
||||
@@ -399,8 +399,6 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[G_16:%.*]] = zext i1 [[G]] to i16
|
||||
; CHECK-NEXT: [[G_64:%.*]] = zext i1 [[G]] to i64
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 15, [[G_64]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
|
||||
; CHECK-NEXT: br label [[VECTOR_SCEVCHECK:%.*]]
|
||||
; CHECK: vector.scevcheck:
|
||||
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
|
||||
|
||||
@@ -587,6 +587,8 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
|
||||
ScalarEvolution *CallerSE = GenSE;
|
||||
ValueMapT CallerGlobals = ValueMap;
|
||||
IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
|
||||
MapVector<const Loop *, const SCEV *> OutsideLoopIterationsCopy =
|
||||
OutsideLoopIterations;
|
||||
|
||||
// Get the analyses for the subfunction. ParallelLoopGenerator already create
|
||||
// DominatorTree and LoopInfo for us.
|
||||
@@ -648,6 +650,19 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
|
||||
}
|
||||
IDToValue[IteratorID] = IV;
|
||||
|
||||
// Also update OutsideLoopIterations to use values from the subfunction.
|
||||
// SCEVExpander may fold identity operations (e.g. x+0 -> x), returning the
|
||||
// original loop PHI instead of a new instruction. We need to remap these
|
||||
// values through NewValues so GenSE (now SubSE) doesn't operate on values
|
||||
// from the caller function.
|
||||
for (auto &[L, S] : OutsideLoopIterations) {
|
||||
if (auto *U = dyn_cast<SCEVUnknown>(S)) {
|
||||
Value *NewVal = NewValues.lookup(U->getValue());
|
||||
assert(NewVal && "must have a new value");
|
||||
OutsideLoopIterations[L] = GenSE->getUnknown(NewVal);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Check whether the maps now exclusively refer to SubFn values.
|
||||
for (auto &[OldVal, SubVal] : ValueMap) {
|
||||
@@ -680,14 +695,12 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
|
||||
GenSE = CallerSE;
|
||||
IDToValue = std::move(IDToValueCopy);
|
||||
ValueMap = std::move(CallerGlobals);
|
||||
OutsideLoopIterations = std::move(OutsideLoopIterationsCopy);
|
||||
ExprBuilder.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
|
||||
RegionGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
|
||||
BlockGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
|
||||
Builder.SetInsertPoint(AfterLoop);
|
||||
|
||||
for (const Loop *L : Loops)
|
||||
OutsideLoopIterations.erase(L);
|
||||
|
||||
isl_ast_node_free(For);
|
||||
isl_ast_expr_free(Iterator);
|
||||
isl_id_free(IteratorID);
|
||||
|
||||
Reference in New Issue
Block a user