[X86] Add basic ISD::VECREDUCE_AND/OR/XOR handling (#195063)
Custom lower ISD::VECREDUCE_AND/OR/XOR using vector logic ops Handling of any_of/all_of/parity patterns will happen later once we start dismantling combinePredicateReduction()
This commit is contained in:
@@ -1159,6 +1159,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
|
||||
setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
|
||||
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
|
||||
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
|
||||
}
|
||||
|
||||
// SSE2 can use basic vector unrolling.
|
||||
@@ -1552,6 +1555,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
setOperationAction(ISD::ABDS, VT, Custom);
|
||||
setOperationAction(ISD::ABDU, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
|
||||
if (VT == MVT::v4i64) continue;
|
||||
setOperationAction(ISD::ROTL, VT, Custom);
|
||||
setOperationAction(ISD::ROTR, VT, Custom);
|
||||
@@ -2021,6 +2027,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::ABDS, VT, Custom);
|
||||
setOperationAction(ISD::ABDU, VT, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
|
||||
|
||||
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
|
||||
// setcc all the way to isel and prefer SETGT in some isel patterns.
|
||||
@@ -29648,6 +29657,43 @@ static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget,
|
||||
return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
|
||||
}
|
||||
|
||||
// Generic x86 vector reduction expansion.
|
||||
static SDValue LowerVECREDUCE(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
ISD::NodeType BinOp = ISD::getVecReduceBaseOpcode(Op.getOpcode());
|
||||
assert(DAG.getTargetLoweringInfo().isBinOp(BinOp) &&
|
||||
"Only binops expected to be used by reductions");
|
||||
|
||||
EVT ExtractVT = Op.getValueType();
|
||||
SDValue Src = Op.getOperand(0);
|
||||
EVT SrcVT = Src.getValueType();
|
||||
EVT SrcSVT = SrcVT.getScalarType();
|
||||
SDLoc DL(Op);
|
||||
|
||||
if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0)
|
||||
return SDValue();
|
||||
|
||||
// Split vector down to 128-bits, performing bin to lo/hi subvectors.
|
||||
while (SrcVT.getSizeInBits() > 128) {
|
||||
SDValue Lo, Hi;
|
||||
std::tie(Lo, Hi) = splitVector(Src, DAG, DL);
|
||||
SrcVT = Lo.getValueType();
|
||||
Src = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi);
|
||||
}
|
||||
assert(SrcVT.is128BitVector() && "Unexpected value type");
|
||||
|
||||
// Expand 128-bit shuffle tree + reduction binops.
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
for (unsigned NumElts = NumSrcElts; NumElts != 1; NumElts /= 2) {
|
||||
SmallVector<int, 16> Mask(NumSrcElts, -1);
|
||||
std::iota(Mask.begin(), Mask.begin() + (NumElts / 2), NumElts / 2);
|
||||
SDValue Upper =
|
||||
DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), Mask);
|
||||
Src = DAG.getNode(BinOp, DL, SrcVT, Src, Upper);
|
||||
}
|
||||
return DAG.getExtractVectorElt(DL, ExtractVT, Src, 0);
|
||||
}
|
||||
|
||||
static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
@@ -34480,6 +34526,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::VECREDUCE_SMIN:
|
||||
case ISD::VECREDUCE_UMAX:
|
||||
case ISD::VECREDUCE_UMIN: return LowerMINMAX_REDUCE(Op, Subtarget, DAG);
|
||||
case ISD::VECREDUCE_AND:
|
||||
case ISD::VECREDUCE_OR:
|
||||
case ISD::VECREDUCE_XOR: return LowerVECREDUCE(Op, Subtarget, DAG);
|
||||
case ISD::FMINIMUM:
|
||||
case ISD::FMAXIMUM:
|
||||
case ISD::FMINIMUMNUM:
|
||||
|
||||
@@ -8,35 +8,30 @@
|
||||
define i32 @extract_last_active_v4i32(<4 x i32> %a, <4 x i1> %c) {
|
||||
; CHECK-LABEL: extract_last_active_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
|
||||
; CHECK-NEXT: movd %xmm2, %eax
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm2, %ecx
|
||||
; CHECK-NEXT: movd %xmm1, %edx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
|
||||
; CHECK-NEXT: movd %xmm2, %esi
|
||||
; CHECK-NEXT: por %xmm1, %xmm2
|
||||
; CHECK-NEXT: pslld $31, %xmm1
|
||||
; CHECK-NEXT: psrad $31, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %edi
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
|
||||
; CHECK-NEXT: movd %xmm0, %r8d
|
||||
; CHECK-NEXT: cmpl %edi, %r8d
|
||||
; CHECK-NEXT: cmoval %r8d, %edi
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
|
||||
; CHECK-NEXT: movd %xmm0, %r8d
|
||||
; CHECK-NEXT: cmpl %r8d, %edi
|
||||
; CHECK-NEXT: cmovbel %r8d, %edi
|
||||
; CHECK-NEXT: orl %edx, %esi
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: orl %esi, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
|
||||
; CHECK-NEXT: por %xmm2, %xmm0
|
||||
; CHECK-NEXT: movd %xmm0, %ecx
|
||||
; CHECK-NEXT: andb $1, %cl
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %cl
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: orl -24(%rsp,%rdi,4), %eax
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
|
||||
; CHECK-NEXT: movd %xmm0, %edx
|
||||
; CHECK-NEXT: cmpl %ecx, %edx
|
||||
; CHECK-NEXT: cmoval %edx, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
|
||||
; CHECK-NEXT: movd %xmm0, %edx
|
||||
; CHECK-NEXT: cmpl %edx, %ecx
|
||||
; CHECK-NEXT: cmovbel %edx, %ecx
|
||||
; CHECK-NEXT: orl -24(%rsp,%rcx,4), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 -1)
|
||||
ret i32 %res
|
||||
@@ -70,11 +65,10 @@ define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) {
|
||||
; CHECK-LABEL: extract_last_active_v2i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movq %xmm1, %rax
|
||||
; CHECK-NEXT: por %xmm1, %xmm2
|
||||
; CHECK-NEXT: psllq $63, %xmm1
|
||||
; CHECK-NEXT: movq %xmm2, %rcx
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: orl %eax, %ecx
|
||||
; CHECK-NEXT: movq %xmm2, %rcx
|
||||
; CHECK-NEXT: andb $1, %cl
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %cl
|
||||
@@ -94,28 +88,31 @@ define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) {
|
||||
define i32 @extract_last_active_v3i32(<3 x i32> %a, <3 x i1> %c) {
|
||||
; CHECK-LABEL: extract_last_active_v3i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movd %edx, %xmm0
|
||||
; CHECK-NEXT: movd %esi, %xmm1
|
||||
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,1]
|
||||
; CHECK-NEXT: pslld $31, %xmm1
|
||||
; CHECK-NEXT: psrad $31, %xmm1
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; CHECK-NEXT: movd %edi, %xmm2
|
||||
; CHECK-NEXT: movdqa %xmm2, %xmm3
|
||||
; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
|
||||
; CHECK-NEXT: movd %edx, %xmm4
|
||||
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: pslld $31, %xmm3
|
||||
; CHECK-NEXT: psrad $31, %xmm3
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; CHECK-NEXT: movd %xmm0, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: cmpl %ecx, %eax
|
||||
; CHECK-NEXT: cmoval %eax, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
|
||||
; CHECK-NEXT: movd %xmm0, %eax
|
||||
; CHECK-NEXT: cmpl %eax, %ecx
|
||||
; CHECK-NEXT: cmovbel %eax, %ecx
|
||||
; CHECK-NEXT: orl %esi, %edi
|
||||
; CHECK-NEXT: orl %edx, %edi
|
||||
; CHECK-NEXT: andb $1, %dil
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %dil
|
||||
; CHECK-NEXT: cmpl $0, %ecx
|
||||
; CHECK-NEXT: cmovbel %eax, %ecx
|
||||
; CHECK-NEXT: por %xmm4, %xmm2
|
||||
; CHECK-NEXT: por %xmm1, %xmm2
|
||||
; CHECK-NEXT: movd %xmm2, %edx
|
||||
; CHECK-NEXT: andb $1, %dl
|
||||
; CHECK-NEXT: cmpb $1, %dl
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: orl -24(%rsp,%rcx,4), %eax
|
||||
; CHECK-NEXT: retq
|
||||
@@ -127,21 +124,18 @@ define i32 @extract_last_active_v3i32(<3 x i32> %a, <3 x i1> %c) {
|
||||
define i32 @extract_last_active_v8i32(<8 x i32> %a, <8 x i1> %c) {
|
||||
; CHECK-LABEL: extract_last_active_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
|
||||
; CHECK-NEXT: por %xmm2, %xmm3
|
||||
; CHECK-NEXT: psllw $15, %xmm2
|
||||
; CHECK-NEXT: psraw $15, %xmm2
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
|
||||
; CHECK-NEXT: psubusw %xmm2, %xmm0
|
||||
; CHECK-NEXT: paddw %xmm2, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: psubusw %xmm0, %xmm1
|
||||
; CHECK-NEXT: paddw %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrld $16, %xmm0
|
||||
; CHECK-NEXT: psubusw %xmm1, %xmm0
|
||||
; CHECK-NEXT: paddw %xmm1, %xmm0
|
||||
; CHECK-NEXT: movd %xmm0, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
|
||||
; CHECK-NEXT: por %xmm3, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrld $16, %xmm1
|
||||
; CHECK-NEXT: por %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %ecx
|
||||
; CHECK-NEXT: andb $1, %cl
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %cl
|
||||
@@ -169,31 +163,18 @@ define i32 @extract_last_active_v8i32(<8 x i32> %a, <8 x i1> %c) {
|
||||
define i32 @extract_last_active_v16i32(<16 x i32> %a, <16 x i1> %c) {
|
||||
; CHECK-LABEL: extract_last_active_v16i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
|
||||
; CHECK-NEXT: por %xmm4, %xmm5
|
||||
; CHECK-NEXT: psllw $7, %xmm4
|
||||
; CHECK-NEXT: pxor %xmm5, %xmm5
|
||||
; CHECK-NEXT: pcmpgtb %xmm4, %xmm5
|
||||
; CHECK-NEXT: pxor %xmm6, %xmm6
|
||||
; CHECK-NEXT: pcmpgtb %xmm4, %xmm6
|
||||
; CHECK-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm5, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrld $16, %xmm0
|
||||
; CHECK-NEXT: pmaxub %xmm1, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrlw $8, %xmm1
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %ecx
|
||||
; CHECK-NEXT: andb $1, %cl
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %cl
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm5[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm5, %xmm0
|
||||
; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm6[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm6, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
@@ -204,6 +185,19 @@ define i32 @extract_last_active_v16i32(<16 x i32> %a, <16 x i1> %c) {
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %ecx
|
||||
; CHECK-NEXT: andl $15, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,1,1]
|
||||
; CHECK-NEXT: por %xmm5, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrld $16, %xmm1
|
||||
; CHECK-NEXT: por %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrlw $8, %xmm0
|
||||
; CHECK-NEXT: por %xmm1, %xmm0
|
||||
; CHECK-NEXT: movd %xmm0, %edx
|
||||
; CHECK-NEXT: andb $1, %dl
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: cmpb $1, %dl
|
||||
; CHECK-NEXT: sbbl %eax, %eax
|
||||
; CHECK-NEXT: orl -72(%rsp,%rcx,4), %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i32 @llvm.experimental.vector.extract.last.active.v16i32(<16 x i32> %a, <16 x i1> %c, i32 -1)
|
||||
@@ -239,9 +233,6 @@ define i8 @extract_last_active_split(<32 x i8> %data, <32 x i8> %mask, i8 %passt
|
||||
; CHECK-NEXT: por %xmm3, %xmm4
|
||||
; CHECK-NEXT: pxor %xmm5, %xmm5
|
||||
; CHECK-NEXT: pcmpeqb %xmm5, %xmm3
|
||||
; CHECK-NEXT: pcmpeqd %xmm6, %xmm6
|
||||
; CHECK-NEXT: movdqa %xmm3, %xmm7
|
||||
; CHECK-NEXT: pxor %xmm6, %xmm7
|
||||
; CHECK-NEXT: pcmpeqb %xmm5, %xmm2
|
||||
; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
|
||||
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
||||
@@ -258,6 +249,7 @@ define i8 @extract_last_active_split(<32 x i8> %data, <32 x i8> %mask, i8 %passt
|
||||
; CHECK-NEXT: psrlw $8, %xmm2
|
||||
; CHECK-NEXT: pmaxub %xmm1, %xmm2
|
||||
; CHECK-NEXT: movd %xmm2, %eax
|
||||
; CHECK-NEXT: pmovmskb %xmm3, %ecx
|
||||
; CHECK-NEXT: pandn %xmm0, %xmm3
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm3, %xmm0
|
||||
@@ -269,37 +261,15 @@ define i8 @extract_last_active_split(<32 x i8> %data, <32 x i8> %mask, i8 %passt
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrlw $8, %xmm1
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %ecx
|
||||
; CHECK-NEXT: addl $16, %ecx
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm7[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm7, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrld $16, %xmm0
|
||||
; CHECK-NEXT: pmaxub %xmm1, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrlw $8, %xmm1
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %edx
|
||||
; CHECK-NEXT: testb $1, %dl
|
||||
; CHECK-NEXT: cmoveq %rax, %rcx
|
||||
; CHECK-NEXT: andl $31, %ecx
|
||||
; CHECK-NEXT: movzbl -40(%rsp,%rcx), %eax
|
||||
; CHECK-NEXT: addl $16, %edx
|
||||
; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; CHECK-NEXT: cmoveq %rax, %rdx
|
||||
; CHECK-NEXT: andl $31, %edx
|
||||
; CHECK-NEXT: movzbl -40(%rsp,%rdx), %eax
|
||||
; CHECK-NEXT: pcmpeqb %xmm5, %xmm4
|
||||
; CHECK-NEXT: pxor %xmm6, %xmm4
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
|
||||
; CHECK-NEXT: pmaxub %xmm4, %xmm0
|
||||
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: psrld $16, %xmm0
|
||||
; CHECK-NEXT: pmaxub %xmm1, %xmm0
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrlw $8, %xmm1
|
||||
; CHECK-NEXT: pmaxub %xmm0, %xmm1
|
||||
; CHECK-NEXT: movd %xmm1, %ecx
|
||||
; CHECK-NEXT: testb $1, %cl
|
||||
; CHECK-NEXT: pmovmskb %xmm4, %ecx
|
||||
; CHECK-NEXT: cmpl $65535, %ecx # imm = 0xFFFF
|
||||
; CHECK-NEXT: cmovel %edi, %eax
|
||||
; CHECK-NEXT: # kill: def $al killed $al killed $eax
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
@@ -1146,56 +1146,35 @@ define zeroext i1 @PR44781(ptr %0) {
|
||||
}
|
||||
|
||||
define i32 @mask_v3i1(<3 x i32> %a, <3 x i32> %b) {
|
||||
; SSE2-LABEL: mask_v3i1:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE2-NEXT: movd %xmm1, %eax
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
|
||||
; SSE2-NEXT: movd %xmm0, %ecx
|
||||
; SSE2-NEXT: orl %eax, %ecx
|
||||
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE2-NEXT: movd %xmm0, %eax
|
||||
; SSE2-NEXT: orl %ecx, %eax
|
||||
; SSE2-NEXT: testb $1, %al
|
||||
; SSE2-NEXT: je .LBB30_2
|
||||
; SSE2-NEXT: # %bb.1:
|
||||
; SSE2-NEXT: xorl %eax, %eax
|
||||
; SSE2-NEXT: retq
|
||||
; SSE2-NEXT: .LBB30_2:
|
||||
; SSE2-NEXT: movl $1, %eax
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE41-LABEL: mask_v3i1:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE41-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE41-NEXT: pextrd $1, %xmm1, %eax
|
||||
; SSE41-NEXT: movd %xmm1, %ecx
|
||||
; SSE41-NEXT: orl %eax, %ecx
|
||||
; SSE41-NEXT: pextrd $2, %xmm1, %eax
|
||||
; SSE41-NEXT: orl %ecx, %eax
|
||||
; SSE41-NEXT: testb $1, %al
|
||||
; SSE41-NEXT: je .LBB30_2
|
||||
; SSE41-NEXT: # %bb.1:
|
||||
; SSE41-NEXT: xorl %eax, %eax
|
||||
; SSE41-NEXT: retq
|
||||
; SSE41-NEXT: .LBB30_2:
|
||||
; SSE41-NEXT: movl $1, %eax
|
||||
; SSE41-NEXT: retq
|
||||
; SSE-LABEL: mask_v3i1:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm0
|
||||
; SSE-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; SSE-NEXT: pxor %xmm0, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
|
||||
; SSE-NEXT: por %xmm1, %xmm0
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
|
||||
; SSE-NEXT: por %xmm0, %xmm1
|
||||
; SSE-NEXT: movd %xmm1, %eax
|
||||
; SSE-NEXT: testb $1, %al
|
||||
; SSE-NEXT: je .LBB30_2
|
||||
; SSE-NEXT: # %bb.1:
|
||||
; SSE-NEXT: xorl %eax, %eax
|
||||
; SSE-NEXT: retq
|
||||
; SSE-NEXT: .LBB30_2:
|
||||
; SSE-NEXT: movl $1, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: mask_v3i1:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
|
||||
; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||||
; AVX1OR2-NEXT: vpxor %xmm1, %xmm0, %xmm0
|
||||
; AVX1OR2-NEXT: vpextrd $1, %xmm0, %eax
|
||||
; AVX1OR2-NEXT: vmovd %xmm0, %ecx
|
||||
; AVX1OR2-NEXT: orl %eax, %ecx
|
||||
; AVX1OR2-NEXT: vpextrd $2, %xmm0, %eax
|
||||
; AVX1OR2-NEXT: orl %ecx, %eax
|
||||
; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
|
||||
; AVX1OR2-NEXT: vpor %xmm1, %xmm0, %xmm1
|
||||
; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; AVX1OR2-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX1OR2-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1OR2-NEXT: testb $1, %al
|
||||
; AVX1OR2-NEXT: je .LBB30_2
|
||||
; AVX1OR2-NEXT: # %bb.1:
|
||||
|
||||
Reference in New Issue
Block a user