[X86] lock opt ptr const inconsistencies (#185195)
Resolves: https://github.com/llvm/llvm-project/issues/147280 The linked issue mentions cases of atomic arithmetic followed by a test which can be recovered by flags that are emitting cas loops instead of lock + op which can be inferred from flags. There's one fold that solves the issue's code: `lock and` sets ZF on the result of old & C, so any nonzero comparison against new = old & C can be answered with ZF, so this fold does just that, reduces to a != 0 or == 0. I also decided to refactor `shouldExpandCmpArithRMWInIR` into a dispatching function and make `getCmpArithCC` just return X86::CondCodes directly. This deleted the dispatching switch later in the code. Also I broke out the different cases of `getCmpArithWithCC` into helper functions for each case (add, sub, and, xor, or, add with overflow, sub with overflow) and turned them guard clause style to reduce nesting since the original `getCmpArithWithCC` had everything in a nested function which was hard to read. The functions are named so it's straightforward to add new cases or move them around as desired.
This commit is contained in:
@@ -32563,68 +32563,168 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
|
||||
AI->eraseFromParent();
|
||||
}
|
||||
|
||||
static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
|
||||
static X86::CondCode matchSignedNewValueCC(const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
if (!AI->hasOneUse())
|
||||
return false;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
|
||||
return X86::COND_S;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return X86::COND_NS;
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode matchAddCC(const AtomicRMWInst *AI, const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
Value *Op = AI->getOperand(1);
|
||||
CmpPredicate Pred;
|
||||
const Instruction *I = AI->user_back();
|
||||
AtomicRMWInst::BinOp Opc = AI->getOperation();
|
||||
if (Opc == AtomicRMWInst::Add) {
|
||||
if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
|
||||
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
|
||||
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) {
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
|
||||
return true;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if (Opc == AtomicRMWInst::Sub) {
|
||||
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
|
||||
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
|
||||
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) {
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
|
||||
return true;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
if ((Opc == AtomicRMWInst::Or &&
|
||||
match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) ||
|
||||
(Opc == AtomicRMWInst::And &&
|
||||
match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) {
|
||||
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
|
||||
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
|
||||
Pred == CmpInst::ICMP_SLT;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
if (Opc == AtomicRMWInst::Xor) {
|
||||
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
|
||||
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
|
||||
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) {
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
|
||||
return true;
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
// Folded from icmp eq/ne (old + Op), 0 to icmp eq/ne old, -Op.
|
||||
if (match(I, m_c_ICmp(Pred, m_Neg(m_Specific(Op)), m_Value()))) {
|
||||
if (Pred == CmpInst::ICMP_EQ)
|
||||
return X86::COND_E;
|
||||
if (Pred == CmpInst::ICMP_NE)
|
||||
return X86::COND_NE;
|
||||
}
|
||||
|
||||
return false;
|
||||
// Non-folded SF form: %new = add %old, Op; icmp slt/sgt %new, 0/-1
|
||||
// lock add sets SF on the new value directly.
|
||||
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value()))))
|
||||
return matchSignedNewValueCC(I);
|
||||
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode matchSubCC(const AtomicRMWInst *AI, const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
Value *Op = AI->getOperand(1);
|
||||
CmpPredicate Pred;
|
||||
|
||||
// Folded from icmp eq/ne (old - Op), 0 to icmp eq/ne old, Op.
|
||||
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
|
||||
if (Pred == CmpInst::ICMP_EQ)
|
||||
return X86::COND_E;
|
||||
if (Pred == CmpInst::ICMP_NE)
|
||||
return X86::COND_NE;
|
||||
}
|
||||
|
||||
// Non-folded SF form: %new = sub %old, Op; icmp slt/sgt %new, 0/-1
|
||||
// lock sub sets SF on the new value directly.
|
||||
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op)))))
|
||||
return matchSignedNewValueCC(I);
|
||||
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode matchOrCC(const AtomicRMWInst *AI, const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
Value *Op = AI->getOperand(1);
|
||||
CmpPredicate Pred;
|
||||
|
||||
// Non-folded form: %new = or %old, Op; icmp P %new, 0/-1
|
||||
// lock or sets ZF/SF on the new value directly.
|
||||
if (!match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))))
|
||||
return X86::COND_INVALID;
|
||||
|
||||
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) {
|
||||
if (Pred == CmpInst::ICMP_EQ)
|
||||
return X86::COND_E;
|
||||
if (Pred == CmpInst::ICMP_NE)
|
||||
return X86::COND_NE;
|
||||
if (Pred == CmpInst::ICMP_SLT)
|
||||
return X86::COND_S;
|
||||
}
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return X86::COND_NS;
|
||||
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode matchAndCC(const AtomicRMWInst *AI, const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
Value *Op = AI->getOperand(1);
|
||||
CmpPredicate Pred;
|
||||
|
||||
// Non-folded form: %new = and %old, Op; icmp P %new, 0/-1
|
||||
// lock and sets ZF/SF on the new value directly.
|
||||
if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value())))) {
|
||||
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) {
|
||||
if (Pred == CmpInst::ICMP_EQ)
|
||||
return X86::COND_E;
|
||||
if (Pred == CmpInst::ICMP_NE)
|
||||
return X86::COND_NE;
|
||||
if (Pred == CmpInst::ICMP_SLT)
|
||||
return X86::COND_S;
|
||||
}
|
||||
if (match(I->user_back(),
|
||||
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
|
||||
return X86::COND_NS;
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
// If -C is a power of 2:
|
||||
// (old & C) == 0 <=> old ult -C
|
||||
// (old & C) != 0 <=> old ugt ~C
|
||||
auto *CI = dyn_cast<ConstantInt>(Op);
|
||||
if (!CI)
|
||||
return X86::COND_INVALID;
|
||||
const APInt &C = CI->getValue();
|
||||
const APInt *K;
|
||||
if (!(-C).isPowerOf2() ||
|
||||
!match(I, m_c_ICmp(Pred, m_Specific(AI), m_APInt(K))))
|
||||
return X86::COND_INVALID;
|
||||
if (Pred == ICmpInst::ICMP_ULT && *K == -C)
|
||||
return X86::COND_E;
|
||||
if (Pred == ICmpInst::ICMP_UGT && *K == ~C)
|
||||
return X86::COND_NE;
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode matchXorCC(const AtomicRMWInst *AI, const Instruction *I) {
|
||||
using namespace llvm::PatternMatch;
|
||||
Value *Op = AI->getOperand(1);
|
||||
CmpPredicate Pred;
|
||||
|
||||
// Folded from icmp eq/ne (old ^ Op), 0 to icmp eq/ne old, Op.
|
||||
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
|
||||
if (Pred == CmpInst::ICMP_EQ)
|
||||
return X86::COND_E;
|
||||
if (Pred == CmpInst::ICMP_NE)
|
||||
return X86::COND_NE;
|
||||
}
|
||||
|
||||
// Non-folded SF form: %new = xor %old, Op; icmp slt/sgt %new, 0/-1
|
||||
// lock xor sets SF on the new value directly.
|
||||
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))))
|
||||
return matchSignedNewValueCC(I);
|
||||
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
|
||||
static X86::CondCode getCmpArithCC(const AtomicRMWInst *AI) {
|
||||
if (!AI->hasOneUse())
|
||||
return X86::COND_INVALID;
|
||||
|
||||
const Instruction *I = AI->user_back();
|
||||
switch (AI->getOperation()) {
|
||||
case AtomicRMWInst::Add:
|
||||
return matchAddCC(AI, I);
|
||||
case AtomicRMWInst::Sub:
|
||||
return matchSubCC(AI, I);
|
||||
case AtomicRMWInst::Or:
|
||||
return matchOrCC(AI, I);
|
||||
case AtomicRMWInst::And:
|
||||
return matchAndCC(AI, I);
|
||||
case AtomicRMWInst::Xor:
|
||||
return matchXorCC(AI, I);
|
||||
default:
|
||||
return X86::COND_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
|
||||
return getCmpArithCC(AI) != X86::COND_INVALID;
|
||||
}
|
||||
|
||||
void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
|
||||
@@ -32639,24 +32739,9 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
|
||||
assert(TempI->hasOneUse() && "Must have one use");
|
||||
ICI = cast<ICmpInst>(TempI->user_back());
|
||||
}
|
||||
X86::CondCode CC = X86::COND_INVALID;
|
||||
ICmpInst::Predicate Pred = ICI->getPredicate();
|
||||
switch (Pred) {
|
||||
default:
|
||||
llvm_unreachable("Not supported Pred");
|
||||
case CmpInst::ICMP_EQ:
|
||||
CC = X86::COND_E;
|
||||
break;
|
||||
case CmpInst::ICMP_NE:
|
||||
CC = X86::COND_NE;
|
||||
break;
|
||||
case CmpInst::ICMP_SLT:
|
||||
CC = X86::COND_S;
|
||||
break;
|
||||
case CmpInst::ICMP_SGT:
|
||||
CC = X86::COND_NS;
|
||||
break;
|
||||
}
|
||||
X86::CondCode CC = getCmpArithCC(AI);
|
||||
assert(CC != X86::COND_INVALID && "emitCmpArithAtomicRMWIntrinsic called "
|
||||
"without a recognised pattern");
|
||||
Intrinsic::ID IID = Intrinsic::not_intrinsic;
|
||||
switch (AI->getOperation()) {
|
||||
default:
|
||||
|
||||
59
llvm/test/CodeGen/X86/atomic-lock-and-setcc-folded.ll
Normal file
59
llvm/test/CodeGen/X86/atomic-lock-and-setcc-folded.ll
Normal file
@@ -0,0 +1,59 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
|
||||
|
||||
; fold (old & -2) != 0.
|
||||
define i1 @lock_and_setne_folded(ptr %p) nounwind {
|
||||
; CHECK-LABEL: lock_and_setne_folded:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lock andq $-2, (%rdi)
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
%old = atomicrmw and ptr %p, i64 -2 seq_cst
|
||||
%r = icmp ugt i64 %old, 1
|
||||
ret i1 %r
|
||||
}
|
||||
|
||||
; fold (old & -2) == 0.
|
||||
define i1 @lock_and_sete_folded(ptr %p) nounwind {
|
||||
; CHECK-LABEL: lock_and_sete_folded:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lock andq $-2, (%rdi)
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
%old = atomicrmw and ptr %p, i64 -2 seq_cst
|
||||
%r = icmp ult i64 %old, 2
|
||||
ret i1 %r
|
||||
}
|
||||
|
||||
; folds with C = -4.
|
||||
define i1 @lock_and_setne_folded_c4(ptr %p) nounwind {
|
||||
; CHECK-LABEL: lock_and_setne_folded_c4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lock andq $-4, (%rdi)
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
%old = atomicrmw and ptr %p, i64 -4 seq_cst
|
||||
%r = icmp ugt i64 %old, 3
|
||||
ret i1 %r
|
||||
}
|
||||
|
||||
; negative test: no fold when -C is not a power of 2.
|
||||
define i1 @lock_and_setne_not_folded(ptr %p) nounwind {
|
||||
; CHECK-LABEL: lock_and_setne_not_folded:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq (%rdi), %rax
|
||||
; CHECK-NEXT: .p2align 4
|
||||
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
|
||||
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: movq %rax, %rcx
|
||||
; CHECK-NEXT: andq $-3, %rcx
|
||||
; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
|
||||
; CHECK-NEXT: jne .LBB3_1
|
||||
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
|
||||
; CHECK-NEXT: cmpq $3, %rax
|
||||
; CHECK-NEXT: setae %al
|
||||
; CHECK-NEXT: retq
|
||||
%old = atomicrmw and ptr %p, i64 -3 seq_cst
|
||||
%r = icmp ugt i64 %old, 2
|
||||
ret i1 %r
|
||||
}
|
||||
Reference in New Issue
Block a user