diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 93514025380f..c2d99a3d2b22 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32563,68 +32563,168 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { AI->eraseFromParent(); } -static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) { +static X86::CondCode matchSignedNewValueCC(const Instruction *I) { using namespace llvm::PatternMatch; - if (!AI->hasOneUse()) - return false; + if (match(I->user_back(), + m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt()))) + return X86::COND_S; + if (match(I->user_back(), + m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) + return X86::COND_NS; + return X86::COND_INVALID; +} +static X86::CondCode matchAddCC(const AtomicRMWInst *AI, const Instruction *I) { + using namespace llvm::PatternMatch; Value *Op = AI->getOperand(1); CmpPredicate Pred; - const Instruction *I = AI->user_back(); - AtomicRMWInst::BinOp Opc = AI->getOperation(); - if (Opc == AtomicRMWInst::Add) { - if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value()))) - return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE; - if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) { - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt()))) - return true; - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) - return true; - } - return false; - } - if (Opc == AtomicRMWInst::Sub) { - if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) - return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE; - if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) { - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt()))) - return true; - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) - return true; - } - return false; - } - if ((Opc == AtomicRMWInst::Or && - match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) || - (Opc == AtomicRMWInst::And && - match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) { - if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) - return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE || - Pred == CmpInst::ICMP_SLT; - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) - return true; - return false; - } - if (Opc == AtomicRMWInst::Xor) { - if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) - return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE; - if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) { - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt()))) - return true; - if (match(I->user_back(), - m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) - return true; - } - return false; + + // Folded from icmp eq/ne (old + Op), 0 to icmp eq/ne old, -Op. + if (match(I, m_c_ICmp(Pred, m_Neg(m_Specific(Op)), m_Value()))) { + if (Pred == CmpInst::ICMP_EQ) + return X86::COND_E; + if (Pred == CmpInst::ICMP_NE) + return X86::COND_NE; } - return false; + // Non-folded SF form: %new = add %old, Op; icmp slt/sgt %new, 0/-1 + // lock add sets SF on the new value directly. + if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) + return matchSignedNewValueCC(I); + + return X86::COND_INVALID; +} + +static X86::CondCode matchSubCC(const AtomicRMWInst *AI, const Instruction *I) { + using namespace llvm::PatternMatch; + Value *Op = AI->getOperand(1); + CmpPredicate Pred; + + // Folded from icmp eq/ne (old - Op), 0 to icmp eq/ne old, Op. + if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) { + if (Pred == CmpInst::ICMP_EQ) + return X86::COND_E; + if (Pred == CmpInst::ICMP_NE) + return X86::COND_NE; + } + + // Non-folded SF form: %new = sub %old, Op; icmp slt/sgt %new, 0/-1 + // lock sub sets SF on the new value directly. + if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) + return matchSignedNewValueCC(I); + + return X86::COND_INVALID; +} + +static X86::CondCode matchOrCC(const AtomicRMWInst *AI, const Instruction *I) { + using namespace llvm::PatternMatch; + Value *Op = AI->getOperand(1); + CmpPredicate Pred; + + // Non-folded form: %new = or %old, Op; icmp P %new, 0/-1 + // lock or sets ZF/SF on the new value directly. + if (!match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) + return X86::COND_INVALID; + + if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) { + if (Pred == CmpInst::ICMP_EQ) + return X86::COND_E; + if (Pred == CmpInst::ICMP_NE) + return X86::COND_NE; + if (Pred == CmpInst::ICMP_SLT) + return X86::COND_S; + } + if (match(I->user_back(), + m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) + return X86::COND_NS; + + return X86::COND_INVALID; +} + +static X86::CondCode matchAndCC(const AtomicRMWInst *AI, const Instruction *I) { + using namespace llvm::PatternMatch; + Value *Op = AI->getOperand(1); + CmpPredicate Pred; + + // Non-folded form: %new = and %old, Op; icmp P %new, 0/-1 + // lock and sets ZF/SF on the new value directly. + if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value())))) { + if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) { + if (Pred == CmpInst::ICMP_EQ) + return X86::COND_E; + if (Pred == CmpInst::ICMP_NE) + return X86::COND_NE; + if (Pred == CmpInst::ICMP_SLT) + return X86::COND_S; + } + if (match(I->user_back(), + m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes()))) + return X86::COND_NS; + return X86::COND_INVALID; + } + + // If -C is a power of 2: + // (old & C) == 0 <=> old ult -C + // (old & C) != 0 <=> old ugt ~C + auto *CI = dyn_cast(Op); + if (!CI) + return X86::COND_INVALID; + const APInt &C = CI->getValue(); + const APInt *K; + if (!(-C).isPowerOf2() || + !match(I, m_c_ICmp(Pred, m_Specific(AI), m_APInt(K)))) + return X86::COND_INVALID; + if (Pred == ICmpInst::ICMP_ULT && *K == -C) + return X86::COND_E; + if (Pred == ICmpInst::ICMP_UGT && *K == ~C) + return X86::COND_NE; + return X86::COND_INVALID; +} + +static X86::CondCode matchXorCC(const AtomicRMWInst *AI, const Instruction *I) { + using namespace llvm::PatternMatch; + Value *Op = AI->getOperand(1); + CmpPredicate Pred; + + // Folded from icmp eq/ne (old ^ Op), 0 to icmp eq/ne old, Op. + if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) { + if (Pred == CmpInst::ICMP_EQ) + return X86::COND_E; + if (Pred == CmpInst::ICMP_NE) + return X86::COND_NE; + } + + // Non-folded SF form: %new = xor %old, Op; icmp slt/sgt %new, 0/-1 + // lock xor sets SF on the new value directly. + if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) + return matchSignedNewValueCC(I); + + return X86::COND_INVALID; +} + +static X86::CondCode getCmpArithCC(const AtomicRMWInst *AI) { + if (!AI->hasOneUse()) + return X86::COND_INVALID; + + const Instruction *I = AI->user_back(); + switch (AI->getOperation()) { + case AtomicRMWInst::Add: + return matchAddCC(AI, I); + case AtomicRMWInst::Sub: + return matchSubCC(AI, I); + case AtomicRMWInst::Or: + return matchOrCC(AI, I); + case AtomicRMWInst::And: + return matchAndCC(AI, I); + case AtomicRMWInst::Xor: + return matchXorCC(AI, I); + default: + return X86::COND_INVALID; + } +} + +static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) { + return getCmpArithCC(AI) != X86::COND_INVALID; } void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic( @@ -32639,24 +32739,9 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic( assert(TempI->hasOneUse() && "Must have one use"); ICI = cast(TempI->user_back()); } - X86::CondCode CC = X86::COND_INVALID; - ICmpInst::Predicate Pred = ICI->getPredicate(); - switch (Pred) { - default: - llvm_unreachable("Not supported Pred"); - case CmpInst::ICMP_EQ: - CC = X86::COND_E; - break; - case CmpInst::ICMP_NE: - CC = X86::COND_NE; - break; - case CmpInst::ICMP_SLT: - CC = X86::COND_S; - break; - case CmpInst::ICMP_SGT: - CC = X86::COND_NS; - break; - } + X86::CondCode CC = getCmpArithCC(AI); + assert(CC != X86::COND_INVALID && "emitCmpArithAtomicRMWIntrinsic called " + "without a recognised pattern"); Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (AI->getOperation()) { default: diff --git a/llvm/test/CodeGen/X86/atomic-lock-and-setcc-folded.ll b/llvm/test/CodeGen/X86/atomic-lock-and-setcc-folded.ll new file mode 100644 index 000000000000..b93d25ea0493 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-lock-and-setcc-folded.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +; fold (old & -2) != 0. +define i1 @lock_and_setne_folded(ptr %p) nounwind { +; CHECK-LABEL: lock_and_setne_folded: +; CHECK: # %bb.0: +; CHECK-NEXT: lock andq $-2, (%rdi) +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %old = atomicrmw and ptr %p, i64 -2 seq_cst + %r = icmp ugt i64 %old, 1 + ret i1 %r +} + +; fold (old & -2) == 0. +define i1 @lock_and_sete_folded(ptr %p) nounwind { +; CHECK-LABEL: lock_and_sete_folded: +; CHECK: # %bb.0: +; CHECK-NEXT: lock andq $-2, (%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq + %old = atomicrmw and ptr %p, i64 -2 seq_cst + %r = icmp ult i64 %old, 2 + ret i1 %r +} + +; folds with C = -4. +define i1 @lock_and_setne_folded_c4(ptr %p) nounwind { +; CHECK-LABEL: lock_and_setne_folded_c4: +; CHECK: # %bb.0: +; CHECK-NEXT: lock andq $-4, (%rdi) +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq + %old = atomicrmw and ptr %p, i64 -4 seq_cst + %r = icmp ugt i64 %old, 3 + ret i1 %r +} + +; negative test: no fold when -C is not a power of 2. +define i1 @lock_and_setne_not_folded(ptr %p) nounwind { +; CHECK-LABEL: lock_and_setne_not_folded: +; CHECK: # %bb.0: +; CHECK-NEXT: movq (%rdi), %rax +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB3_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: andq $-3, %rcx +; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi) +; CHECK-NEXT: jne .LBB3_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: cmpq $3, %rax +; CHECK-NEXT: setae %al +; CHECK-NEXT: retq + %old = atomicrmw and ptr %p, i64 -3 seq_cst + %r = icmp ugt i64 %old, 2 + ret i1 %r +}