In some cases, LV gets simplifyable IR as input. Directly apply simplifications on the initial VPlan0 to avoid vectorization in cases where the loop body can be folded away. Using the end-to-end pipeline, this is relatively rare, but when reducing test cases, the reduction often ends up with cases with trivial folds. Rejecting those will result in more robust & realistic test cases. As follow-up, I also plan to add initial dead recipe removal. Depends on https://github.com/llvm/llvm-project/pull/176795. PR: https://github.com/llvm/llvm-project/pull/176828
46 lines
1.9 KiB
LLVM
46 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
|
|
; RUN: opt -p loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
|
|
|
|
define void @neg_cond(ptr noalias %p, ptr noalias %q) {
|
|
; CHECK-LABEL: define void @neg_cond(
|
|
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
|
|
; CHECK: [[VECTOR_PH]]:
|
|
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
|
|
; CHECK: [[VECTOR_BODY]]:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]]
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 42)
|
|
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 43), <4 x i32> splat (i32 42)
|
|
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP0]], align 4
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CHECK: [[MIDDLE_BLOCK]]:
|
|
; CHECK-NEXT: br label %[[EXIT:.*]]
|
|
; CHECK: [[EXIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%iv = phi i32 [0, %entry], [%iv.next, %loop]
|
|
%p.gep = getelementptr i32, ptr %p, i32 %iv
|
|
%x = load i32, ptr %p.gep
|
|
%q.gep = getelementptr i32, ptr %q, i32 %iv
|
|
%y = load i32, ptr %q.gep
|
|
%cmp = icmp eq i32 %x, 42
|
|
%not = xor i1 %cmp, 1
|
|
%sel = select i1 %not, i32 42, i32 43
|
|
store i32 %sel, ptr %p.gep
|
|
%iv.next = add i32 %iv, 1
|
|
%done = icmp eq i32 %iv.next, 1024
|
|
br i1 %done, label %exit, label %loop
|
|
|
|
exit:
|
|
ret void
|
|
}
|