DAGCombiner canonicalizes `and(vnot(x), sext(cmp))` into `vselect(cmp, vnot(x), zero)`. Without a dedicated ISel pattern, this falls through to the generic vselect-to-bitselect lowering, producing `v128.not` + `v128.bitselect`. This patch adds a higher-priority pattern that emits a single `v128.andnot` instead, saving two instructions. The direct `and(vnot(x), y)` pattern is already handled by the existing `andnot` PatFrag. This patch covers the indirect path through DAGCombiner's `and(x, sext(cmp))` → `vselect(cmp, x, zero)` canonicalization, which obscures the `andnot` opportunity when `x` is `vnot(y)`. Assisted-by: Claude (Anthropic) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
704 lines
23 KiB
LLVM
704 lines
23 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
|
|
|
|
; Test that vector selects of various varieties lower correctly.
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
; ==============================================================================
|
|
; 16 x i8
|
|
; ==============================================================================
|
|
define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: vselect_v16i8:
|
|
; CHECK: .functype vselect_v16i8 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 7
|
|
; CHECK-NEXT: i8x16.shl
|
|
; CHECK-NEXT: i32.const 7
|
|
; CHECK-NEXT: i8x16.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
|
|
; CHECK-LABEL: vselect_cmp_v16i8:
|
|
; CHECK: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i8x16.lt_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
<16 x i8> %x, <16 x i8> %y) {
|
|
%c = icmp slt <16 x i8> %a, %b
|
|
%res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: select_v16i8:
|
|
; CHECK: .functype select_v16i8 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: select_cmp_v16i8:
|
|
; CHECK: .functype select_cmp_v16i8 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: select_ne_v16i8:
|
|
; CHECK: .functype select_ne_v16i8 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
|
|
; CHECK-LABEL: select_eq_v16i8:
|
|
; CHECK: .functype select_eq_v16i8 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
|
|
; CHECK-LABEL: vselect_v8i16:
|
|
; CHECK: .functype vselect_v8i16 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 15
|
|
; CHECK-NEXT: i16x8.shl
|
|
; CHECK-NEXT: i32.const 15
|
|
; CHECK-NEXT: i16x8.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
|
|
; CHECK-LABEL: vselect_cmp_v8i16:
|
|
; CHECK: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i16x8.lt_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
<8 x i16> %x, <8 x i16> %y) {
|
|
%c = icmp slt <8 x i16> %a, %b
|
|
%res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
|
|
; CHECK-LABEL: select_v8i16:
|
|
; CHECK: .functype select_v8i16 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
|
|
; CHECK-LABEL: select_cmp_v8i16:
|
|
; CHECK: .functype select_cmp_v8i16 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
|
|
; CHECK-LABEL: select_ne_v8i16:
|
|
; CHECK: .functype select_ne_v8i16 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
|
|
; CHECK-LABEL: select_eq_v8i16:
|
|
; CHECK: .functype select_eq_v8i16 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: vselect_v4i32:
|
|
; CHECK: .functype vselect_v4i32 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shl
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
|
|
; CHECK-LABEL: vselect_cmp_v4i32:
|
|
; CHECK: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32x4.lt_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
<4 x i32> %x, <4 x i32> %y) {
|
|
%c = icmp slt <4 x i32> %a, %b
|
|
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: select_v4i32:
|
|
; CHECK: .functype select_v4i32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: select_cmp_v4i32:
|
|
; CHECK: .functype select_cmp_v4i32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: select_ne_v4i32:
|
|
; CHECK: .functype select_ne_v4i32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
|
|
; CHECK-LABEL: select_eq_v4i32:
|
|
; CHECK: .functype select_eq_v4i32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: vselect_v2i64:
|
|
; CHECK: .functype vselect_v2i64 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 63
|
|
; CHECK-NEXT: i64x2.shl
|
|
; CHECK-NEXT: i32.const 63
|
|
; CHECK-NEXT: i64x2.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: vselect_cmp_v2i64:
|
|
; CHECK: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i64x2.lt_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt <2 x i64> %a, %b
|
|
%res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: select_v2i64:
|
|
; CHECK: .functype select_v2i64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: select_cmp_v2i64:
|
|
; CHECK: .functype select_cmp_v2i64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: select_ne_v2i64:
|
|
; CHECK: .functype select_ne_v2i64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
|
|
; CHECK-LABEL: select_eq_v2i64:
|
|
; CHECK: .functype select_eq_v2i64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
|
|
ret <2 x i64> %res
|
|
}
|
|
|
|
define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: vselect_v4f32:
|
|
; CHECK: .functype vselect_v4f32 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shl
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
|
|
; CHECK-LABEL: vselect_cmp_v4f32:
|
|
; CHECK: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: f32x4.lt
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
<4 x float> %x, <4 x float> %y) {
|
|
%c = fcmp olt <4 x float> %a, %b
|
|
%res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: select_v4f32:
|
|
; CHECK: .functype select_v4f32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: select_cmp_v4f32:
|
|
; CHECK: .functype select_cmp_v4f32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: select_ne_v4f32:
|
|
; CHECK: .functype select_ne_v4f32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
|
|
; CHECK-LABEL: select_eq_v4f32:
|
|
; CHECK: .functype select_eq_v4f32 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <4 x float> %x, <4 x float> %y
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: vselect_v2f64:
|
|
; CHECK: .functype vselect_v2f64 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 63
|
|
; CHECK-NEXT: i64x2.shl
|
|
; CHECK-NEXT: i32.const 63
|
|
; CHECK-NEXT: i64x2.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
|
|
; CHECK-LABEL: vselect_cmp_v2f64:
|
|
; CHECK: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 3
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: f64x2.lt
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
<2 x double> %x, <2 x double> %y) {
|
|
%c = fcmp olt <2 x double> %a, %b
|
|
%res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: select_v2f64:
|
|
; CHECK: .functype select_v2f64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select i1 %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: select_cmp_v2f64:
|
|
; CHECK: .functype select_cmp_v2f64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 0
|
|
; CHECK-NEXT: i32.lt_s
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp slt i32 %i, 0
|
|
%res = select i1 %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: select_ne_v2f64:
|
|
; CHECK: .functype select_ne_v2f64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp ne i32 %i, 0
|
|
%res = select i1 %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
|
|
; CHECK-LABEL: select_eq_v2f64:
|
|
; CHECK: .functype select_eq_v2f64 (i32, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.select
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%c = icmp eq i32 %i, 0
|
|
%res = select i1 %c, <2 x double> %x, <2 x double> %y
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
define <4 x i32> @select_splat_first_zero_and_icmp(<4 x i32> %x) {
|
|
; CHECK-LABEL: select_splat_first_zero_and_icmp:
|
|
; CHECK: .functype select_splat_first_zero_and_icmp (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.const 2139095040, 2139095040, 2139095040, 2139095040
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: v128.const 0, 0, 0, 0
|
|
; CHECK-NEXT: i32x4.ne
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%a = and <4 x i32> %x, splat (i32 2139095040)
|
|
%c = icmp eq <4 x i32> %a, zeroinitializer
|
|
%res = select <4 x i1> %c, <4 x i32> zeroinitializer, <4 x i32> %x
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_splat_second_zero_and_icmp(<4 x i32> %x) {
|
|
; CHECK-LABEL: select_splat_second_zero_and_icmp:
|
|
; CHECK: .functype select_splat_second_zero_and_icmp (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.const 2139095040, 2139095040, 2139095040, 2139095040
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: v128.const 0, 0, 0, 0
|
|
; CHECK-NEXT: i32x4.eq
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%a = and <4 x i32> %x, splat (i32 2139095040)
|
|
%c = icmp eq <4 x i32> %a, zeroinitializer
|
|
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_splat_first_zero_cond_input(<4 x i1> %c, <4 x i32> %x) {
|
|
; CHECK-LABEL: select_splat_first_zero_cond_input:
|
|
; CHECK: .functype select_splat_first_zero_cond_input (v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: v128.const 0, 0, 0, 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shl
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shr_s
|
|
; CHECK-NEXT: v128.bitselect
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <4 x i1> %c, <4 x i32> zeroinitializer, <4 x i32> %x
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <4 x i32> @select_splat_second_zero_cond_input(<4 x i1> %c, <4 x i32> %x) {
|
|
; CHECK-LABEL: select_splat_second_zero_cond_input:
|
|
; CHECK: .functype select_splat_second_zero_cond_input (v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shl
|
|
; CHECK-NEXT: i32.const 31
|
|
; CHECK-NEXT: i32x4.shr_s
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: v128.and
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
; ==============================================================================
|
|
; vselect(cmp, vnot(x), zero) -> v128.andnot
|
|
; ==============================================================================
|
|
|
|
; Test that and(vnot(x), sext(cmp)) lowers to v128.andnot.
|
|
;
|
|
; DAGCombiner canonicalizes and(x, sext(cmp)) into vselect(cmp, x, zero).
|
|
; When x is vnot(y), this produces vselect(cmp, vnot(y), zero) which the
|
|
; generic vselect-to-bitselect pattern lowers as v128.not + v128.bitselect.
|
|
; A dedicated ISel pattern matches this as a single v128.andnot instead.
|
|
|
|
define <4 x i32> @andnot_sext_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %x) {
|
|
; CHECK-LABEL: andnot_sext_v4i32:
|
|
; CHECK: .functype andnot_sext_v4i32 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i32x4.lt_s
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: v128.andnot
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%cmp = icmp slt <4 x i32> %a, %b
|
|
%sext = sext <4 x i1> %cmp to <4 x i32>
|
|
%not = xor <4 x i32> %x, splat (i32 -1)
|
|
%res = and <4 x i32> %not, %sext
|
|
ret <4 x i32> %res
|
|
}
|
|
|
|
define <16 x i8> @andnot_sext_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %x) {
|
|
; CHECK-LABEL: andnot_sext_v16i8:
|
|
; CHECK: .functype andnot_sext_v16i8 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i8x16.lt_s
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: v128.andnot
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%cmp = icmp slt <16 x i8> %a, %b
|
|
%sext = sext <16 x i1> %cmp to <16 x i8>
|
|
%not = xor <16 x i8> %x, splat (i8 -1)
|
|
%res = and <16 x i8> %not, %sext
|
|
ret <16 x i8> %res
|
|
}
|
|
|
|
define <8 x i16> @andnot_sext_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %x) {
|
|
; CHECK-LABEL: andnot_sext_v8i16:
|
|
; CHECK: .functype andnot_sext_v8i16 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i16x8.lt_s
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: v128.andnot
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%cmp = icmp slt <8 x i16> %a, %b
|
|
%sext = sext <8 x i1> %cmp to <8 x i16>
|
|
%not = xor <8 x i16> %x, splat (i16 -1)
|
|
%res = and <8 x i16> %not, %sext
|
|
ret <8 x i16> %res
|
|
}
|
|
|
|
define <2 x i64> @andnot_sext_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %x) {
|
|
; CHECK-LABEL: andnot_sext_v2i64:
|
|
; CHECK: .functype andnot_sext_v2i64 (v128, v128, v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 1
|
|
; CHECK-NEXT: i64x2.lt_s
|
|
; CHECK-NEXT: local.get 2
|
|
; CHECK-NEXT: v128.andnot
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%cmp = icmp slt <2 x i64> %a, %b
|
|
%sext = sext <2 x i1> %cmp to <2 x i64>
|
|
%not = xor <2 x i64> %x, splat (i64 -1)
|
|
%res = and <2 x i64> %not, %sext
|
|
ret <2 x i64> %res
|
|
}
|
|
|