Moving these into the middle-end pipeline will allow for additional optimization of the expansion result, such as CSE of redundant loads (c.f. https://godbolt.org/z/bEna4Md9r). For now, we conservatively place the passes at the end of the middle-end pipeline, so we mostly don't benefit from additional optimizations yet. The pipeline position will be moved in a future change. This builds on work done by legrosbuffle in https://reviews.llvm.org/D60318. --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
88 lines
3.2 KiB
LLVM
88 lines
3.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -passes=expand-memcmp -mtriple=wasm32-unknown-unknown -mattr=+simd128 -S < %s | llc -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
declare i32 @memcmp(ptr, ptr, i32)
|
|
|
|
define i1 @setcc_load(ptr %a, ptr %b) {
|
|
; CHECK-LABEL: setcc_load:
|
|
; CHECK: .functype setcc_load (i32, i32) -> (i32)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: v128.load $push1=, 0($0):p2align=0
|
|
; CHECK-NEXT: v128.load $push0=, 0($1):p2align=0
|
|
; CHECK-NEXT: i8x16.eq $push2=, $pop1, $pop0
|
|
; CHECK-NEXT: i8x16.all_true $push3=, $pop2
|
|
; CHECK-NEXT: return $pop3
|
|
%cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
|
|
%res = icmp eq i32 %cmp_16, 0
|
|
ret i1 %res
|
|
}
|
|
|
|
; INFO: Negative test: noimplicitfloat disables simd
|
|
define i1 @setcc_load_should_not_vectorize(ptr %a, ptr %b) noimplicitfloat {
|
|
; CHECK-LABEL: setcc_load_should_not_vectorize:
|
|
; CHECK: .functype setcc_load_should_not_vectorize (i32, i32) -> (i32)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i64.load $push4=, 0($0):p2align=0
|
|
; CHECK-NEXT: i64.load $push3=, 0($1):p2align=0
|
|
; CHECK-NEXT: i64.xor $push5=, $pop4, $pop3
|
|
; CHECK-NEXT: i64.load $push1=, 8($0):p2align=0
|
|
; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0
|
|
; CHECK-NEXT: i64.xor $push2=, $pop1, $pop0
|
|
; CHECK-NEXT: i64.or $push6=, $pop5, $pop2
|
|
; CHECK-NEXT: i64.eqz $push7=, $pop6
|
|
; CHECK-NEXT: return $pop7
|
|
%cmp_16 = call i32 @memcmp(ptr %a, ptr %b, i32 16)
|
|
%res = icmp eq i32 %cmp_16, 0
|
|
ret i1 %res
|
|
}
|
|
|
|
define i1 @setcc_eq_const_i128(ptr %ptr) {
|
|
; CHECK-LABEL: setcc_eq_const_i128:
|
|
; CHECK: .functype setcc_eq_const_i128 (i32) -> (i32)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: v128.load $push0=, 0($0)
|
|
; CHECK-NEXT: v128.const $push1=, 6, 0
|
|
; CHECK-NEXT: i8x16.eq $push2=, $pop0, $pop1
|
|
; CHECK-NEXT: i8x16.all_true $push3=, $pop2
|
|
; CHECK-NEXT: return $pop3
|
|
%l = load i128, ptr %ptr
|
|
%res = icmp eq i128 %l, 6
|
|
ret i1 %res
|
|
}
|
|
|
|
define i1 @setcc_ne_const_i128(ptr %ptr) {
|
|
; CHECK-LABEL: setcc_ne_const_i128:
|
|
; CHECK: .functype setcc_ne_const_i128 (i32) -> (i32)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: v128.load $push0=, 0($0)
|
|
; CHECK-NEXT: v128.const $push1=, 16, 0
|
|
; CHECK-NEXT: i8x16.ne $push2=, $pop0, $pop1
|
|
; CHECK-NEXT: v128.any_true $push3=, $pop2
|
|
; CHECK-NEXT: return $pop3
|
|
%l = load i128, ptr %ptr
|
|
%res = icmp ne i128 %l, 16
|
|
ret i1 %res
|
|
}
|
|
|
|
; INFO: Negative test: only eq and ne works
|
|
define i1 @setcc_slt_const_i128(ptr %ptr) {
|
|
; CHECK-LABEL: setcc_slt_const_i128:
|
|
; CHECK: .functype setcc_slt_const_i128 (i32) -> (i32)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: i64.load $push2=, 0($0)
|
|
; CHECK-NEXT: i64.const $push3=, 25
|
|
; CHECK-NEXT: i64.lt_u $push4=, $pop2, $pop3
|
|
; CHECK-NEXT: i64.load $push8=, 8($0)
|
|
; CHECK-NEXT: local.tee $push7=, $1=, $pop8
|
|
; CHECK-NEXT: i64.const $push0=, 0
|
|
; CHECK-NEXT: i64.lt_s $push1=, $pop7, $pop0
|
|
; CHECK-NEXT: i64.eqz $push5=, $1
|
|
; CHECK-NEXT: i32.select $push6=, $pop4, $pop1, $pop5
|
|
; CHECK-NEXT: return $pop6
|
|
%l = load i128, ptr %ptr
|
|
%res = icmp slt i128 %l, 25
|
|
ret i1 %res
|
|
}
|