We no longer need to declare LLVM intrinsics in .ll files as the intrinsics are populated automatically in the module. Remove the declarations from tests to reduce test noise and size. This came from a suggestion on PR #190786.
854 lines
40 KiB
LLVM
854 lines
40 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
|
|
; RUN: opt -S < %s -p loop-vectorize -force-vector-width=4 | FileCheck %s
|
|
;
|
|
@A = global [64 x i8] zeroinitializer
|
|
@B = global [64 x i8] zeroinitializer
|
|
@C = global [64 x i8] zeroinitializer
|
|
@D = global [64 x i8] zeroinitializer
|
|
|
|
define i64 @diamond_with_2_early_exits() {
|
|
; CHECK-LABEL: define i64 @diamond_with_2_early_exits() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
|
|
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
|
|
; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]]
|
|
; CHECK-NEXT: [[CMP_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]])
|
|
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[CMP_A]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false)
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP16]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%branch.cond = icmp slt i8 %l.A, 0
|
|
br i1 %branch.cond, label %block.a, label %block.b
|
|
|
|
block.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%ext = zext i8 %l.B to i64
|
|
%cmp.a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a, label %loop.end, label %loop.latch
|
|
|
|
block.b:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp.b = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.b, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %ext, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
define i64 @three_early_exits() {
|
|
; CHECK-LABEL: define i64 @three_early_exits() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ]
|
|
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
|
|
; CHECK-NEXT: [[COND_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
|
|
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[COND_A]], label %[[BLOCK_A:.*]], label %[[CHECK_B]]
|
|
; CHECK: [[CHECK_B]]:
|
|
; CHECK-NEXT: br i1 [[TMP20]], label %[[BLOCK_B:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CHECK: [[BLOCK_B]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
|
|
; CHECK-NEXT: [[CMP_B:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH:.*]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[BLOCK_B]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%cond.a = icmp slt i8 %l.A, -42
|
|
br i1 %cond.a, label %block.a, label %check.b
|
|
|
|
check.b:
|
|
%cond.b = icmp slt i8 %l.A, 42
|
|
br i1 %cond.b, label %block.b, label %block.c
|
|
|
|
block.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%cmp.a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a, label %loop.end, label %loop.latch
|
|
|
|
block.b:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp.b = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.b, label %loop.end, label %loop.latch
|
|
|
|
block.c:
|
|
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
|
|
%l.D = load i8, ptr %gep.D, align 1
|
|
%cmp.c = icmp eq i8 %l.A, %l.D
|
|
br i1 %cmp.c, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
define i64 @nested_diamond_inner_exits() {
|
|
; CHECK-LABEL: define i64 @nested_diamond_inner_exits() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_A2:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]]
|
|
; CHECK-NEXT: [[INNER_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
|
|
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[INNER_COND]], label %[[BLOCK_A1:.*]], label %[[BLOCK_A2]]
|
|
; CHECK: [[BLOCK_A2]]:
|
|
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP4:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH:.*]]
|
|
; CHECK: [[BLOCK_A1]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false)
|
|
; CHECK-NEXT: [[CMP_A1:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[CMP_A1]], label %[[LOOP_END:.*]], label %[[JOIN_A:.*]]
|
|
; CHECK: [[JOIN_A]]:
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP22]], label %[[BLOCK_B:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
|
; CHECK: [[BLOCK_B]]:
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: br label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[BLOCK_B]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%outer.cond = icmp slt i8 %l.A, 0
|
|
br i1 %outer.cond, label %block.a, label %block.b
|
|
|
|
block.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%inner.cond = icmp slt i8 %l.B, 0
|
|
br i1 %inner.cond, label %block.a1, label %block.a2
|
|
|
|
block.a1:
|
|
%cmp.a1 = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a1, label %loop.end, label %join.a
|
|
|
|
block.a2:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp.a2 = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.a2, label %loop.end, label %join.a
|
|
|
|
join.a:
|
|
br label %loop.latch
|
|
|
|
block.b:
|
|
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
|
|
%l.D = load i8, ptr %gep.D, align 1
|
|
%cmp.b = icmp eq i8 %l.A, %l.D
|
|
br i1 %cmp.b, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 1, %block.a1 ], [ 2, %block.a2 ], [ 3, %block.b ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
define i64 @chain_of_3_exits() {
|
|
; CHECK-LABEL: define i64 @chain_of_3_exits() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_C:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
|
|
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]]
|
|
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[TMP10]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP12]]
|
|
; CHECK-NEXT: [[CMP_B:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
|
|
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[BLOCK_C]]
|
|
; CHECK: [[BLOCK_C]]:
|
|
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP5:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false)
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_LATCH:.*]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP17]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[VECTOR_EARLY_EXIT_2]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%cond.a = icmp slt i8 %l.A, 0
|
|
br i1 %cond.a, label %block.a, label %loop.latch
|
|
|
|
block.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%cmp.a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a, label %loop.end, label %block.b
|
|
|
|
block.b:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp.b = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.b, label %loop.end, label %block.c
|
|
|
|
block.c:
|
|
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
|
|
%l.D = load i8, ptr %gep.D, align 1
|
|
%cmp.c = icmp eq i8 %l.A, %l.D
|
|
br i1 %cmp.c, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
define i64 @four_exits_2x2_diamond() {
|
|
; CHECK-LABEL: define i64 @four_exits_2x2_diamond() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[BRANCH1_A:.*]]
|
|
; CHECK: [[BRANCH1_A]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BRANCH2:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD3]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP13]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP18]]
|
|
; CHECK-NEXT: [[CMP1A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]])
|
|
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[CMP1A]], label %[[LOOP_END:.*]], label %[[BRANCH2]]
|
|
; CHECK: [[BRANCH2]]:
|
|
; CHECK-NEXT: br i1 [[TMP21]], label %[[BRANCH2_A:.*]], label %[[BRANCH1_A]], !llvm.loop [[LOOP6:![0-9]+]]
|
|
; CHECK: [[BRANCH2_A]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP18]], i1 false)
|
|
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[BRANCH2_B:.*]]
|
|
; CHECK: [[BRANCH2_B]]:
|
|
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP23]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[LOOP_LATCH:.*]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_3]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_2]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 4, %[[VECTOR_EARLY_EXIT_2]] ], [ 3, %[[VECTOR_EARLY_EXIT_3]] ], [ 2, %[[VECTOR_EARLY_EXIT_0]] ], [ 1, %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[BRANCH2_A]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%cond1 = icmp slt i8 %l.A, 0
|
|
br i1 %cond1, label %branch1.a, label %branch1.b
|
|
|
|
branch1.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%cmp1a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp1a, label %loop.end, label %branch2
|
|
|
|
branch1.b:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp1b = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp1b, label %loop.end, label %branch2
|
|
|
|
branch2:
|
|
%gep.D = getelementptr inbounds i8, ptr @D, i64 %iv
|
|
%l.D = load i8, ptr %gep.D, align 1
|
|
%cond2 = icmp slt i8 %l.D, 0
|
|
br i1 %cond2, label %branch2.a, label %branch2.b
|
|
|
|
branch2.a:
|
|
%cmp2a = icmp eq i8 %l.A, %l.D
|
|
br i1 %cmp2a, label %loop.end, label %loop.latch
|
|
|
|
branch2.b:
|
|
%cmp2b = icmp ne i8 %l.A, %l.D
|
|
br i1 %cmp2b, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 1, %branch1.a ], [ 2, %branch1.b ], [ 3, %branch2.a ], [ 4, %branch2.b ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
; Diamond where neither branch exits directly. Both sides of the diamond
|
|
; merge to a block that then has an exit with a phi-dependent live-out value.
|
|
define i64 @diamond_merge_then_exit_with_phi_liveout() {
|
|
; CHECK-LABEL: define i64 @diamond_merge_then_exit_with_phi_liveout() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
|
|
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[WIDE_LOAD2]], <4 x i8> [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[PREDPHI]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
|
|
; CHECK: [[VECTOR_BODY_INTERIM]]:
|
|
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT]]:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i64>
|
|
; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64>
|
|
; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP9]], <4 x i64> [[TMP8]]
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false)
|
|
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[PREDPHI3]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%cond = icmp slt i8 %l.A, 0
|
|
br i1 %cond, label %left, label %right
|
|
|
|
left:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%val.left = zext i8 %l.B to i64
|
|
br label %merge
|
|
|
|
right:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%val.right = zext i8 %l.C to i64
|
|
br label %merge
|
|
|
|
merge:
|
|
%val = phi i64 [ %val.left, %left ], [ %val.right, %right ]
|
|
%ld.for.cmp = phi i8 [ %l.B, %left ], [ %l.C, %right ]
|
|
%cmp = icmp eq i8 %l.A, %ld.for.cmp
|
|
br i1 %cmp, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %val, %merge ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
; Diamond where both exit conditions compare l.A against l.B and l.C
|
|
; respectively. If l.B == l.C at runtime, both conditions could be true
|
|
; for the same lane, but the masking with cond/NOT cond prevents both
|
|
; from firing simultaneously. Tests that the predication correctly
|
|
; disambiguates the exits.
|
|
define i64 @diamond_exits_overlapping_conditions() {
|
|
; CHECK-LABEL: define i64 @diamond_exits_overlapping_conditions() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_B:.*]] ]
|
|
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]]
|
|
; CHECK-NEXT: [[COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B]]
|
|
; CHECK: [[BLOCK_B]]:
|
|
; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false)
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_END:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[LOOP_END]] ], [ 0, %[[LOOP_LATCH]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cond = icmp slt i8 %l.A, 0
|
|
br i1 %cond, label %block.a, label %block.b
|
|
|
|
block.a:
|
|
%cmp.a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a, label %loop.end, label %loop.latch
|
|
|
|
block.b:
|
|
%cmp.b = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.b, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ %iv, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
; Block C is reachable from both an exit-fallthrough path and a direct branch.
|
|
; block.a has an exit; if it doesn't exit, it falls through to block.c.
|
|
; The header's false branch goes directly to block.c.
|
|
; block.c then has its own exit.
|
|
define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
|
|
; CHECK-LABEL: define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
|
|
; CHECK: [[LOOP_HEADER]]:
|
|
; CHECK-NEXT: br label %[[BLOCK_A:.*]]
|
|
; CHECK: [[BLOCK_A]]:
|
|
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer
|
|
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
|
|
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]]
|
|
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1
|
|
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
|
|
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]]
|
|
; CHECK-NEXT: [[CMP_C:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
|
|
; CHECK-NEXT: br i1 [[CMP_C]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]]
|
|
; CHECK: [[LOOP_LATCH]]:
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP9:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: ret i64 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
|
|
%gep.A = getelementptr inbounds i8, ptr @A, i64 %iv
|
|
%l.A = load i8, ptr %gep.A, align 1
|
|
%cond = icmp slt i8 %l.A, 0
|
|
br i1 %cond, label %block.a, label %block.c
|
|
|
|
block.a:
|
|
%gep.B = getelementptr inbounds i8, ptr @B, i64 %iv
|
|
%l.B = load i8, ptr %gep.B, align 1
|
|
%cmp.a = icmp eq i8 %l.A, %l.B
|
|
br i1 %cmp.a, label %loop.end, label %block.c
|
|
|
|
block.c:
|
|
%gep.C = getelementptr inbounds i8, ptr @C, i64 %iv
|
|
%l.C = load i8, ptr %gep.C, align 1
|
|
%cmp.c = icmp eq i8 %l.A, %l.C
|
|
br i1 %cmp.c, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add i64 %iv, 1
|
|
%exitcond = icmp ne i64 %iv.next, 64
|
|
br i1 %exitcond, label %loop.header, label %loop.end
|
|
|
|
loop.end:
|
|
%retval = phi i64 [ 1, %block.a ], [ 2, %block.c ], [ 0, %loop.latch ]
|
|
ret i64 %retval
|
|
}
|
|
|
|
; When the else branch is speculatively executed for iv < 2, `sub nuw` wraps
|
|
; producing poison. This poison condition is processed first in RPO.
|
|
; Test for https://github.com/llvm/llvm-project/issues/187061.
|
|
define i32 @diamond_exit_poison_from_speculated_branch() {
|
|
; CHECK-LABEL: define i32 @diamond_exit_poison_from_speculated_branch() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> splat (i1 true)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
|
|
; CHECK: [[UNREACHABLE_EXIT]]:
|
|
; CHECK-NEXT: call void @llvm.trap()
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
|
|
; CHECK-NEXT: ret i32 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%cmp = icmp ult i32 %iv, 2
|
|
br i1 %cmp, label %then, label %else
|
|
|
|
then:
|
|
%lo.val = add i32 %iv, 10
|
|
br label %loop.exiting
|
|
|
|
else:
|
|
%sub = sub nuw i32 %iv, 2
|
|
%shl = shl nuw i32 1, %sub
|
|
%trap.cond = icmp eq i32 %shl, 999
|
|
br i1 %trap.cond, label %unreachable.exit, label %loop.exiting
|
|
|
|
unreachable.exit:
|
|
call void @llvm.trap()
|
|
unreachable
|
|
|
|
loop.exiting:
|
|
%val = phi i32 [ %lo.val, %then ], [ %shl, %else ]
|
|
%found.cond = icmp ult i32 %val, 12
|
|
br i1 %found.cond, label %loop.end, label %loop.latch
|
|
|
|
loop.latch:
|
|
%iv.next = add nuw nsw i32 %iv, 1
|
|
%done = icmp eq i32 %iv.next, 4
|
|
br i1 %done, label %loop.end, label %loop.header
|
|
|
|
loop.end:
|
|
%retval = phi i32 [ %val, %loop.exiting ], [ -1, %loop.latch ]
|
|
ret i32 %retval
|
|
}
|
|
|
|
; Same as above but the poison exit condition (trap.cond from the speculated
|
|
; else branch) comes second in RPO. The first processed exit is from
|
|
; the then branch (RPO-before else).
|
|
define i32 @diamond_exit_poison_cond_second() {
|
|
; CHECK-LABEL: define i32 @diamond_exit_poison_cond_second() {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> <i1 true, i1 false, i1 false, i1 false>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]])
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END:.*]]
|
|
; CHECK: [[LOOP_END]]:
|
|
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]:
|
|
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false)
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_1]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]]
|
|
; CHECK-NEXT: br label %[[LOOP_END1]]
|
|
; CHECK: [[VECTOR_EARLY_EXIT_0]]:
|
|
; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]]
|
|
; CHECK: [[UNREACHABLE_EXIT]]:
|
|
; CHECK-NEXT: call void @llvm.trap()
|
|
; CHECK-NEXT: unreachable
|
|
; CHECK: [[LOOP_END1]]:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ]
|
|
; CHECK-NEXT: ret i32 [[RETVAL]]
|
|
;
|
|
entry:
|
|
br label %loop.header
|
|
|
|
loop.header:
|
|
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
|
|
%cmp = icmp ult i32 %iv, 2
|
|
br i1 %cmp, label %then, label %else
|
|
|
|
then:
|
|
%val = add i32 %iv, 10
|
|
%found.cond = icmp ult i32 %val, 11
|
|
br i1 %found.cond, label %loop.end, label %loop.latch
|
|
|
|
else:
|
|
; sub nuw produces poison when speculatively executed for iv < 2.
|
|
%sub = sub nuw i32 %iv, 2
|
|
%shl = shl nuw i32 1, %sub
|
|
%trap.cond = icmp eq i32 %shl, 999
|
|
br i1 %trap.cond, label %unreachable.exit, label %loop.latch
|
|
|
|
unreachable.exit:
|
|
call void @llvm.trap()
|
|
unreachable
|
|
|
|
loop.latch:
|
|
%iv.next = add nuw nsw i32 %iv, 1
|
|
%done = icmp eq i32 %iv.next, 4
|
|
br i1 %done, label %loop.end, label %loop.header
|
|
|
|
loop.end:
|
|
%retval = phi i32 [ %val, %then ], [ -1, %loop.latch ]
|
|
ret i32 %retval
|
|
}
|
|
|