Files
llvm-project/llvm/test/CodeGen/PowerPC/atomic-float.ll
Akshay Deodhar 184f236a18 [AtomicExpandPass] Preserve atomic and volatile nature of emulated operations (#188361)
The fix does the following in expandPartwordCmpXchg and
insertRMWCmpXchgLoop.

- Issues volatile operations in the emulation loops if the original
operation is volatile.
- A preheader load is used for initializing the "cmp" and "new" values
of the cmpxchg in the loop. Makes this load atomic. This is done under a
target hook (`issueAtomicInitLoadForAtomicEmulation()`) , to allow
backends to migrate independently.
- `processAtomicInstr` is called on this load, to massage it into
something that can be lowered in SelectionDAG / GISel.
- This caused 3 kinds of failures.

1. Caused by change to codegen: updated these either using the scripts,
or mechanically (using claude) to match the new codegen.
2. Crashes caused by newly created atomic loads not being processed by
AtomicExpandPass. (The atomic load if tested in an independent test does
not cause a crash). To fix these, added recursive calls to
processAtomicInstr on the newly created atomic loads. These calls
convert the loads to libcalls, or cast them to integer types.
3. Crashes in X86, AMDGPU, and AArch64 caused by unhandled vector types.
These loads crash even with upstream LLVM, due to the lack of support in
these targets for vector atomic loads (the corresponding vector
atomicrmw instructions are supported). Disabled issuing atomic loads for
these backends. Will follow up with individual PRs to revert to default
behavior.
2026-04-30 09:31:39 -07:00

94 lines
3.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-unknown-unknown \
; RUN: < %s | FileCheck --check-prefix=CHECK-64 %s
; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc-unknown-unknown \
; RUN: < %s | FileCheck --check-prefix=CHECK-32 %s
define float @test_add(ptr %ptr, float %incr) {
; CHECK-64-LABEL: test_add:
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: sync
; CHECK-64-NEXT: lwz 4, 0(3)
; CHECK-64-NEXT: stw 4, -4(1)
; CHECK-64-NEXT: lfs 0, -4(1)
; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-64-NEXT: # =>This Loop Header: Depth=1
; CHECK-64-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-64-NEXT: fadds 2, 0, 1
; CHECK-64-NEXT: stfs 2, -8(1)
; CHECK-64-NEXT: stfs 0, -12(1)
; CHECK-64-NEXT: lwz 5, -8(1)
; CHECK-64-NEXT: lwz 6, -12(1)
; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.start
; CHECK-64-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-64-NEXT: lwarx 4, 0, 3
; CHECK-64-NEXT: cmplw 4, 6
; CHECK-64-NEXT: bne- 0, .LBB0_5
; CHECK-64-NEXT: # %bb.3: # %cmpxchg.fencedstore
; CHECK-64-NEXT: #
; CHECK-64-NEXT: stwcx. 5, 0, 3
; CHECK-64-NEXT: creqv 20, 20, 20
; CHECK-64-NEXT: bne- 0, .LBB0_2
; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.end
; CHECK-64-NEXT: #
; CHECK-64-NEXT: stw 4, -16(1)
; CHECK-64-NEXT: lfs 0, -16(1)
; CHECK-64-NEXT: bc 4, 20, .LBB0_1
; CHECK-64-NEXT: b .LBB0_6
; CHECK-64-NEXT: .LBB0_5: # %cmpxchg.nostore
; CHECK-64-NEXT: #
; CHECK-64-NEXT: crxor 20, 20, 20
; CHECK-64-NEXT: b .LBB0_4
; CHECK-64-NEXT: .LBB0_6: # %atomicrmw.end
; CHECK-64-NEXT: fmr 1, 0
; CHECK-64-NEXT: lwsync
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: test_add:
; CHECK-32: # %bb.0: # %entry
; CHECK-32-NEXT: stwu 1, -32(1)
; CHECK-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-32-NEXT: sync
; CHECK-32-NEXT: lwz 4, 0(3)
; CHECK-32-NEXT: stw 4, 28(1)
; CHECK-32-NEXT: lfs 0, 28(1)
; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-32-NEXT: # =>This Loop Header: Depth=1
; CHECK-32-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-32-NEXT: fadds 2, 0, 1
; CHECK-32-NEXT: stfs 2, 24(1)
; CHECK-32-NEXT: stfs 0, 20(1)
; CHECK-32-NEXT: lwz 5, 24(1)
; CHECK-32-NEXT: lwz 6, 20(1)
; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.start
; CHECK-32-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-32-NEXT: lwarx 4, 0, 3
; CHECK-32-NEXT: cmplw 4, 6
; CHECK-32-NEXT: bne- 0, .LBB0_5
; CHECK-32-NEXT: # %bb.3: # %cmpxchg.fencedstore
; CHECK-32-NEXT: #
; CHECK-32-NEXT: stwcx. 5, 0, 3
; CHECK-32-NEXT: creqv 20, 20, 20
; CHECK-32-NEXT: bne- 0, .LBB0_2
; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.end
; CHECK-32-NEXT: #
; CHECK-32-NEXT: stw 4, 16(1)
; CHECK-32-NEXT: lfs 0, 16(1)
; CHECK-32-NEXT: bc 4, 20, .LBB0_1
; CHECK-32-NEXT: b .LBB0_6
; CHECK-32-NEXT: .LBB0_5: # %cmpxchg.nostore
; CHECK-32-NEXT: #
; CHECK-32-NEXT: crxor 20, 20, 20
; CHECK-32-NEXT: b .LBB0_4
; CHECK-32-NEXT: .LBB0_6: # %atomicrmw.end
; CHECK-32-NEXT: fmr 1, 0
; CHECK-32-NEXT: lwsync
; CHECK-32-NEXT: addi 1, 1, 32
; CHECK-32-NEXT: blr
entry:
%r = atomicrmw fadd ptr %ptr, float %incr seq_cst
ret float %r
}