[X86] lock opt ptr const inconsistencies (#185195)

Resolves: https://github.com/llvm/llvm-project/issues/147280

The linked issue mentions cases of atomic arithmetic followed by a test
which can be recovered by flags that are emitting cas loops instead of
lock + op which can be inferred from flags.

There's one fold that solves the issue's code: `lock and` sets ZF on the
result of old & C, so any nonzero comparison against new = old & C can
be answered with ZF, so this fold does just that, reduces to a != 0 or
== 0.

I also decided to refactor `shouldExpandCmpArithRMWInIR` into a
dispatching function and make `getCmpArithCC` just return X86::CondCodes
directly. This deleted the dispatching switch later in the code.

Also I broke out the different cases of `getCmpArithWithCC` into helper
functions for each case (add, sub, and, xor, or, add with overflow, sub
with overflow) and turned them guard clause style to reduce nesting
since the original `getCmpArithWithCC` had everything in a nested
function which was hard to read. The functions are named so it's
straightforward to add new cases or move them around as desired.
This commit is contained in:
Takashi Idobe
2026-04-28 03:51:36 -04:00
committed by GitHub
parent 427423946a
commit fb4af2af00
2 changed files with 218 additions and 74 deletions

View File

@@ -32563,68 +32563,168 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
AI->eraseFromParent();
}
static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
static X86::CondCode matchSignedNewValueCC(const Instruction *I) {
using namespace llvm::PatternMatch;
if (!AI->hasOneUse())
return false;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
return X86::COND_S;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return X86::COND_NS;
return X86::COND_INVALID;
}
static X86::CondCode matchAddCC(const AtomicRMWInst *AI, const Instruction *I) {
using namespace llvm::PatternMatch;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
const Instruction *I = AI->user_back();
AtomicRMWInst::BinOp Opc = AI->getOperation();
if (Opc == AtomicRMWInst::Add) {
if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
return true;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return true;
}
return false;
}
if (Opc == AtomicRMWInst::Sub) {
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
return true;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return true;
}
return false;
}
if ((Opc == AtomicRMWInst::Or &&
match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) ||
(Opc == AtomicRMWInst::And &&
match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
Pred == CmpInst::ICMP_SLT;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return true;
return false;
}
if (Opc == AtomicRMWInst::Xor) {
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
return true;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return true;
}
return false;
// Folded from icmp eq/ne (old + Op), 0 to icmp eq/ne old, -Op.
if (match(I, m_c_ICmp(Pred, m_Neg(m_Specific(Op)), m_Value()))) {
if (Pred == CmpInst::ICMP_EQ)
return X86::COND_E;
if (Pred == CmpInst::ICMP_NE)
return X86::COND_NE;
}
return false;
// Non-folded SF form: %new = add %old, Op; icmp slt/sgt %new, 0/-1
// lock add sets SF on the new value directly.
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value()))))
return matchSignedNewValueCC(I);
return X86::COND_INVALID;
}
static X86::CondCode matchSubCC(const AtomicRMWInst *AI, const Instruction *I) {
using namespace llvm::PatternMatch;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
// Folded from icmp eq/ne (old - Op), 0 to icmp eq/ne old, Op.
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
if (Pred == CmpInst::ICMP_EQ)
return X86::COND_E;
if (Pred == CmpInst::ICMP_NE)
return X86::COND_NE;
}
// Non-folded SF form: %new = sub %old, Op; icmp slt/sgt %new, 0/-1
// lock sub sets SF on the new value directly.
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op)))))
return matchSignedNewValueCC(I);
return X86::COND_INVALID;
}
static X86::CondCode matchOrCC(const AtomicRMWInst *AI, const Instruction *I) {
using namespace llvm::PatternMatch;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
// Non-folded form: %new = or %old, Op; icmp P %new, 0/-1
// lock or sets ZF/SF on the new value directly.
if (!match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))))
return X86::COND_INVALID;
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) {
if (Pred == CmpInst::ICMP_EQ)
return X86::COND_E;
if (Pred == CmpInst::ICMP_NE)
return X86::COND_NE;
if (Pred == CmpInst::ICMP_SLT)
return X86::COND_S;
}
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return X86::COND_NS;
return X86::COND_INVALID;
}
static X86::CondCode matchAndCC(const AtomicRMWInst *AI, const Instruction *I) {
using namespace llvm::PatternMatch;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
// Non-folded form: %new = and %old, Op; icmp P %new, 0/-1
// lock and sets ZF/SF on the new value directly.
if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value())))) {
if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) {
if (Pred == CmpInst::ICMP_EQ)
return X86::COND_E;
if (Pred == CmpInst::ICMP_NE)
return X86::COND_NE;
if (Pred == CmpInst::ICMP_SLT)
return X86::COND_S;
}
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
return X86::COND_NS;
return X86::COND_INVALID;
}
// If -C is a power of 2:
// (old & C) == 0 <=> old ult -C
// (old & C) != 0 <=> old ugt ~C
auto *CI = dyn_cast<ConstantInt>(Op);
if (!CI)
return X86::COND_INVALID;
const APInt &C = CI->getValue();
const APInt *K;
if (!(-C).isPowerOf2() ||
!match(I, m_c_ICmp(Pred, m_Specific(AI), m_APInt(K))))
return X86::COND_INVALID;
if (Pred == ICmpInst::ICMP_ULT && *K == -C)
return X86::COND_E;
if (Pred == ICmpInst::ICMP_UGT && *K == ~C)
return X86::COND_NE;
return X86::COND_INVALID;
}
static X86::CondCode matchXorCC(const AtomicRMWInst *AI, const Instruction *I) {
using namespace llvm::PatternMatch;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
// Folded from icmp eq/ne (old ^ Op), 0 to icmp eq/ne old, Op.
if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
if (Pred == CmpInst::ICMP_EQ)
return X86::COND_E;
if (Pred == CmpInst::ICMP_NE)
return X86::COND_NE;
}
// Non-folded SF form: %new = xor %old, Op; icmp slt/sgt %new, 0/-1
// lock xor sets SF on the new value directly.
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))))
return matchSignedNewValueCC(I);
return X86::COND_INVALID;
}
static X86::CondCode getCmpArithCC(const AtomicRMWInst *AI) {
if (!AI->hasOneUse())
return X86::COND_INVALID;
const Instruction *I = AI->user_back();
switch (AI->getOperation()) {
case AtomicRMWInst::Add:
return matchAddCC(AI, I);
case AtomicRMWInst::Sub:
return matchSubCC(AI, I);
case AtomicRMWInst::Or:
return matchOrCC(AI, I);
case AtomicRMWInst::And:
return matchAndCC(AI, I);
case AtomicRMWInst::Xor:
return matchXorCC(AI, I);
default:
return X86::COND_INVALID;
}
}
static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
return getCmpArithCC(AI) != X86::COND_INVALID;
}
void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
@@ -32639,24 +32739,9 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
assert(TempI->hasOneUse() && "Must have one use");
ICI = cast<ICmpInst>(TempI->user_back());
}
X86::CondCode CC = X86::COND_INVALID;
ICmpInst::Predicate Pred = ICI->getPredicate();
switch (Pred) {
default:
llvm_unreachable("Not supported Pred");
case CmpInst::ICMP_EQ:
CC = X86::COND_E;
break;
case CmpInst::ICMP_NE:
CC = X86::COND_NE;
break;
case CmpInst::ICMP_SLT:
CC = X86::COND_S;
break;
case CmpInst::ICMP_SGT:
CC = X86::COND_NS;
break;
}
X86::CondCode CC = getCmpArithCC(AI);
assert(CC != X86::COND_INVALID && "emitCmpArithAtomicRMWIntrinsic called "
"without a recognised pattern");
Intrinsic::ID IID = Intrinsic::not_intrinsic;
switch (AI->getOperation()) {
default:

View File

@@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; fold (old & -2) != 0.
define i1 @lock_and_setne_folded(ptr %p) nounwind {
; CHECK-LABEL: lock_and_setne_folded:
; CHECK: # %bb.0:
; CHECK-NEXT: lock andq $-2, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%old = atomicrmw and ptr %p, i64 -2 seq_cst
%r = icmp ugt i64 %old, 1
ret i1 %r
}
; fold (old & -2) == 0.
define i1 @lock_and_sete_folded(ptr %p) nounwind {
; CHECK-LABEL: lock_and_sete_folded:
; CHECK: # %bb.0:
; CHECK-NEXT: lock andq $-2, (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
%old = atomicrmw and ptr %p, i64 -2 seq_cst
%r = icmp ult i64 %old, 2
ret i1 %r
}
; folds with C = -4.
define i1 @lock_and_setne_folded_c4(ptr %p) nounwind {
; CHECK-LABEL: lock_and_setne_folded_c4:
; CHECK: # %bb.0:
; CHECK-NEXT: lock andq $-4, (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
%old = atomicrmw and ptr %p, i64 -4 seq_cst
%r = icmp ugt i64 %old, 3
ret i1 %r
}
; negative test: no fold when -C is not a power of 2.
define i1 @lock_and_setne_not_folded(ptr %p) nounwind {
; CHECK-LABEL: lock_and_setne_not_folded:
; CHECK: # %bb.0:
; CHECK-NEXT: movq (%rdi), %rax
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: andq $-3, %rcx
; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
; CHECK-NEXT: jne .LBB3_1
; CHECK-NEXT: # %bb.2: # %atomicrmw.end
; CHECK-NEXT: cmpq $3, %rax
; CHECK-NEXT: setae %al
; CHECK-NEXT: retq
%old = atomicrmw and ptr %p, i64 -3 seq_cst
%r = icmp ugt i64 %old, 2
ret i1 %r
}