Files
llvm-project/llvm/test/Transforms/LoopVectorize/predicated-multiple-exits.ll
David Sherwood ba91dd14b9 [LV][NFC] Remove unneeded LLVM intrinsic declarations (#190993)
We no longer need to declare LLVM intrinsics in .ll files as the
intrinsics are populated automatically in the module. Remove the
declarations from tests to reduce test noise and size.

This came from a suggestion on PR #190786.
2026-04-09 11:41:18 +01:00

854 lines
40 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -S < %s -p loop-vectorize -force-vector-width=4 | FileCheck %s
;
@A = global [64 x i8] zeroinitializer
@B = global [64 x i8] zeroinitializer
@C = global [64 x i8] zeroinitializer
@D = global [64 x i8] zeroinitializer
define i64 @diamond_with_2_early_exits() {
; CHECK-LABEL: define i64 @diamond_with_2_early_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
; CHECK-NEXT: [[CMP_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_A]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false)
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP16]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%branch.cond = icmp slt i8 %l.A, 0
br i1 %branch.cond, label %block.a, label %block.b
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%ext = zext i8 %l.B to i64
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %ext, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @three_early_exits() {
; CHECK-LABEL: define i64 @three_early_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42)
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42)
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
; CHECK-NEXT: [[COND_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[COND_A]], label %[[BLOCK_A:.*]], label %[[CHECK_B]]
; CHECK: [[CHECK_B]]:
; CHECK-NEXT: br i1 [[TMP20]], label %[[BLOCK_B:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
; CHECK-NEXT: [[CMP_B:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[BLOCK_B]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond.a = icmp slt i8 %l.A, -42
br i1 %cond.a, label %block.a, label %check.b
check.b:
%cond.b = icmp slt i8 %l.A, 42
br i1 %cond.b, label %block.b, label %block.c
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
block.c:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.c = icmp eq i8 %l.A, %l.D
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @nested_diamond_inner_exits() {
; CHECK-LABEL: define i64 @nested_diamond_inner_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_A2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
; CHECK-NEXT: [[INNER_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[INNER_COND]], label %[[BLOCK_A1:.*]], label %[[BLOCK_A2]]
; CHECK: [[BLOCK_A2]]:
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_LATCH:.*]]
; CHECK: [[BLOCK_A1]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
; CHECK-NEXT: [[CMP_A1:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[CMP_A1]], label %[[LOOP_END:.*]], label %[[JOIN_A:.*]]
; CHECK: [[JOIN_A]]:
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[BLOCK_B:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[BLOCK_B]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%outer.cond = icmp slt i8 %l.A, 0
br i1 %outer.cond, label %block.a, label %block.b
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%inner.cond = icmp slt i8 %l.B, 0
br i1 %inner.cond, label %block.a1, label %block.a2
block.a1:
%cmp.a1 = icmp eq i8 %l.A, %l.B
br i1 %cmp.a1, label %loop.end, label %join.a
block.a2:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.a2 = icmp eq i8 %l.A, %l.C
br i1 %cmp.a2, label %loop.end, label %join.a
join.a:
br label %loop.latch
block.b:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.b = icmp eq i8 %l.A, %l.D
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a1 ], [ 2, %block.a2 ], [ 3, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @chain_of_3_exits() {
; CHECK-LABEL: define i64 @chain_of_3_exits() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_C:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP12]]
; CHECK-NEXT: [[CMP_B:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[BLOCK_C]]
; CHECK: [[BLOCK_C]]:
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false)
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP17]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[VECTOR_EARLY_EXIT_2]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond.a = icmp slt i8 %l.A, 0
br i1 %cond.a, label %block.a, label %loop.latch
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %block.b
block.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %block.c
block.c:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cmp.c = icmp eq i8 %l.A, %l.D
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
define i64 @four_exits_2x2_diamond() {
; CHECK-LABEL: define i64 @four_exits_2x2_diamond() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BRANCH1_A:.*]]
; CHECK: [[BRANCH1_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BRANCH2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD3]], zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP13]]
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP18]]
; CHECK-NEXT: [[CMP1A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]])
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP1A]], label %[[LOOP_END:.*]], label %[[BRANCH2]]
; CHECK: [[BRANCH2]]:
; CHECK-NEXT: br i1 [[TMP21]], label %[[BRANCH2_A:.*]], label %[[BRANCH1_A]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[BRANCH2_A]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP18]], i1 false)
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[BRANCH2_B:.*]]
; CHECK: [[BRANCH2_B]]:
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP23]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[LOOP_LATCH:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_3]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 4, %[[VECTOR_EARLY_EXIT_2]] ], [ 3, %[[VECTOR_EARLY_EXIT_3]] ], [ 2, %[[VECTOR_EARLY_EXIT_0]] ], [ 1, %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[BRANCH2_A]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond1 = icmp slt i8 %l.A, 0
br i1 %cond1, label %branch1.a, label %branch1.b
branch1.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp1a = icmp eq i8 %l.A, %l.B
br i1 %cmp1a, label %loop.end, label %branch2
branch1.b:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp1b = icmp eq i8 %l.A, %l.C
br i1 %cmp1b, label %loop.end, label %branch2
branch2:
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
%l.D = load i8, ptr %gep.D, align 1
%cond2 = icmp slt i8 %l.D, 0
br i1 %cond2, label %branch2.a, label %branch2.b
branch2.a:
%cmp2a = icmp eq i8 %l.A, %l.D
br i1 %cmp2a, label %loop.end, label %loop.latch
branch2.b:
%cmp2b = icmp ne i8 %l.A, %l.D
br i1 %cmp2b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %branch1.a ], [ 2, %branch1.b ], [ 3, %branch2.a ], [ 4, %branch2.b ], [ 0, %loop.latch ]
ret i64 %retval
}
; Diamond where neither branch exits directly. Both sides of the diamond
; merge to a block that then has an exit with a phi-dependent live-out value.
define i64 @diamond_merge_then_exit_with_phi_liveout() {
; CHECK-LABEL: define i64 @diamond_merge_then_exit_with_phi_liveout() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[WIDE_LOAD2]], <4 x i8> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[PREDPHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP9]], <4 x i64> [[TMP8]]
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[PREDPHI3]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %left, label %right
left:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%val.left = zext i8 %l.B to i64
br label %merge
right:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%val.right = zext i8 %l.C to i64
br label %merge
merge:
%val = phi i64 [ %val.left, %left ], [ %val.right, %right ]
%ld.for.cmp = phi i8 [ %l.B, %left ], [ %l.C, %right ]
%cmp = icmp eq i8 %l.A, %ld.for.cmp
br i1 %cmp, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %val, %merge ], [ 0, %loop.latch ]
ret i64 %retval
}
; Diamond where both exit conditions compare l.A against l.B and l.C
; respectively. If l.B == l.C at runtime, both conditions could be true
; for the same lane, but the masking with cond/NOT cond prevents both
; from firing simultaneously. Tests that the predication correctly
; disambiguates the exits.
define i64 @diamond_exits_overlapping_conditions() {
; CHECK-LABEL: define i64 @diamond_exits_overlapping_conditions() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_B:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
; CHECK-NEXT: [[COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B]]
; CHECK: [[BLOCK_B]]:
; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[LOOP_END]] ], [ 0, %[[LOOP_LATCH]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %block.a, label %block.b
block.a:
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %loop.latch
block.b:
%cmp.b = icmp eq i8 %l.A, %l.C
br i1 %cmp.b, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ %iv, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
ret i64 %retval
}
; Block C is reachable from both an exit-fallthrough path and a direct branch.
; block.a has an exit; if it doesn't exit, it falls through to block.c.
; The header's false branch goes directly to block.c.
; block.c then has its own exit.
define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
; CHECK-LABEL: define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
; CHECK: [[BLOCK_A]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
; CHECK-NEXT: [[CMP_C:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
; CHECK-NEXT: br i1 [[CMP_C]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false)
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
%l.A = load i8, ptr %gep.A, align 1
%cond = icmp slt i8 %l.A, 0
br i1 %cond, label %block.a, label %block.c
block.a:
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
%l.B = load i8, ptr %gep.B, align 1
%cmp.a = icmp eq i8 %l.A, %l.B
br i1 %cmp.a, label %loop.end, label %block.c
block.c:
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
%l.C = load i8, ptr %gep.C, align 1
%cmp.c = icmp eq i8 %l.A, %l.C
br i1 %cmp.c, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add i64 %iv, 1
%exitcond = icmp ne i64 %iv.next, 64
br i1 %exitcond, label %loop.header, label %loop.end
loop.end:
%retval = phi i64 [ 1, %block.a ], [ 2, %block.c ], [ 0, %loop.latch ]
ret i64 %retval
}
; When the else branch is speculatively executed for iv < 2, `sub nuw` wraps
; producing poison. This poison condition is processed first in RPO.
; Test for https://github.com/llvm/llvm-project/issues/187061.
define i32 @diamond_exit_poison_from_speculated_branch() {
; CHECK-LABEL: define i32 @diamond_exit_poison_from_speculated_branch() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> splat (i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ult i32 %iv, 2
br i1 %cmp, label %then, label %else
then:
%lo.val = add i32 %iv, 10
br label %loop.exiting
else:
%sub = sub nuw i32 %iv, 2
%shl = shl nuw i32 1, %sub
%trap.cond = icmp eq i32 %shl, 999
br i1 %trap.cond, label %unreachable.exit, label %loop.exiting
unreachable.exit:
call void @llvm.trap()
unreachable
loop.exiting:
%val = phi i32 [ %lo.val, %then ], [ %shl, %else ]
%found.cond = icmp ult i32 %val, 12
br i1 %found.cond, label %loop.end, label %loop.latch
loop.latch:
%iv.next = add nuw nsw i32 %iv, 1
%done = icmp eq i32 %iv.next, 4
br i1 %done, label %loop.end, label %loop.header
loop.end:
%retval = phi i32 [ %val, %loop.exiting ], [ -1, %loop.latch ]
ret i32 %retval
}
; Same as above but the poison exit condition (trap.cond from the speculated
; else branch) comes second in RPO. The first processed exit is from
; the then branch (RPO-before else).
define i32 @diamond_exit_poison_cond_second() {
; CHECK-LABEL: define i32 @diamond_exit_poison_cond_second() {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> <i1 true, i1 false, i1 false, i1 false>
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false)
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1]]
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
; CHECK: [[UNREACHABLE_EXIT]]:
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: unreachable
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
; CHECK-NEXT: ret i32 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp = icmp ult i32 %iv, 2
br i1 %cmp, label %then, label %else
then:
%val = add i32 %iv, 10
%found.cond = icmp ult i32 %val, 11
br i1 %found.cond, label %loop.end, label %loop.latch
else:
; sub nuw produces poison when speculatively executed for iv < 2.
%sub = sub nuw i32 %iv, 2
%shl = shl nuw i32 1, %sub
%trap.cond = icmp eq i32 %shl, 999
br i1 %trap.cond, label %unreachable.exit, label %loop.latch
unreachable.exit:
call void @llvm.trap()
unreachable
loop.latch:
%iv.next = add nuw nsw i32 %iv, 1
%done = icmp eq i32 %iv.next, 4
br i1 %done, label %loop.end, label %loop.header
loop.end:
%retval = phi i32 [ %val, %then ], [ -1, %loop.latch ]
ret i32 %retval
}