Files
llvm-project/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll
Akshay Deodhar 184f236a18 [AtomicExpandPass] Preserve atomic and volatile nature of emulated operations (#188361)
The fix does the following in expandPartwordCmpXchg and
insertRMWCmpXchgLoop.

- Issues volatile operations in the emulation loops if the original
operation is volatile.
- A preheader load is used for initializing the "cmp" and "new" values
of the cmpxchg in the loop. Makes this load atomic. This is done under a
target hook (`issueAtomicInitLoadForAtomicEmulation()`) , to allow
backends to migrate independently.
- `processAtomicInstr` is called on this load, to massage it into
something that can be lowered in SelectionDAG / GISel.
- This caused 3 kinds of failures.

1. Caused by change to codegen: updated these either using the scripts,
or mechanically (using claude) to match the new codegen.
2. Crashes caused by newly created atomic loads not being processed by
AtomicExpandPass. (The atomic load if tested in an independent test does
not cause a crash). To fix these, added recursive calls to
processAtomicInstr on the newly created atomic loads. These calls
convert the loads to libcalls, or cast them to integer types.
3. Crashes in X86, AMDGPU, and AArch64 caused by unhandled vector types.
These loads crash even with upstream LLVM, due to the lack of support in
these targets for vector atomic loads (the corresponding vector
atomicrmw instructions are supported). Disabled issuing atomic loads for
these backends. Will follow up with individual PRs to revert to default
behavior.
2026-04-30 09:31:39 -07:00

1385 lines
49 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32IA %s
; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32IA %s
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64IA %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64IA %s
define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; RV32I-LABEL: atomicrmw_usub_cond_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_1
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: zext.b s2, s0
; RV32I-NEXT: .LBB0_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: zext.b a0, a1
; RV32I-NEXT: sltu a0, a0, s2
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: sub a2, a1, a0
; RV32I-NEXT: sb a1, 15(sp)
; RV32I-NEXT: addi a1, sp, 15
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a1, 15(sp)
; RV32I-NEXT: beqz a0, .LBB0_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_cond_i8:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
; RV32IA-NEXT: slli a3, a0, 3
; RV32IA-NEXT: li a4, 255
; RV32IA-NEXT: andi a0, a3, 24
; RV32IA-NEXT: lw a5, 0(a2)
; RV32IA-NEXT: sll a3, a4, a3
; RV32IA-NEXT: not a3, a3
; RV32IA-NEXT: zext.b a4, a1
; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB0_3 Depth 2
; RV32IA-NEXT: mv a6, a5
; RV32IA-NEXT: srl a5, a5, a0
; RV32IA-NEXT: zext.b a7, a5
; RV32IA-NEXT: sltu a7, a7, a4
; RV32IA-NEXT: addi a7, a7, -1
; RV32IA-NEXT: and a7, a7, a1
; RV32IA-NEXT: sub a5, a5, a7
; RV32IA-NEXT: zext.b a5, a5
; RV32IA-NEXT: sll a5, a5, a0
; RV32IA-NEXT: and a7, a6, a3
; RV32IA-NEXT: or a7, a7, a5
; RV32IA-NEXT: .LBB0_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a5, (a2)
; RV32IA-NEXT: bne a5, a6, .LBB0_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2
; RV32IA-NEXT: sc.w.rl t0, a7, (a2)
; RV32IA-NEXT: bnez t0, .LBB0_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: srl a0, a5, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_cond_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_1
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: zext.b s2, s0
; RV64I-NEXT: .LBB0_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: zext.b a0, a1
; RV64I-NEXT: sltu a0, a0, s2
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: sub a2, a1, a0
; RV64I-NEXT: sb a1, 15(sp)
; RV64I-NEXT: addi a1, sp, 15
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_1
; RV64I-NEXT: lbu a1, 15(sp)
; RV64I-NEXT: beqz a0, .LBB0_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: .cfi_restore s2
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_cond_i8:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a4, a0, 3
; RV64IA-NEXT: li a5, 255
; RV64IA-NEXT: andi a0, a4, 24
; RV64IA-NEXT: lw a3, 0(a2)
; RV64IA-NEXT: sllw a4, a5, a4
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: zext.b a5, a1
; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB0_3 Depth 2
; RV64IA-NEXT: srlw a6, a3, a0
; RV64IA-NEXT: sext.w a7, a3
; RV64IA-NEXT: zext.b t0, a6
; RV64IA-NEXT: sltu t0, t0, a5
; RV64IA-NEXT: addi t0, t0, -1
; RV64IA-NEXT: and t0, t0, a1
; RV64IA-NEXT: sub a6, a6, t0
; RV64IA-NEXT: zext.b a6, a6
; RV64IA-NEXT: sllw a6, a6, a0
; RV64IA-NEXT: and a3, a3, a4
; RV64IA-NEXT: or a6, a3, a6
; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a3, (a2)
; RV64IA-NEXT: bne a3, a7, .LBB0_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2
; RV64IA-NEXT: sc.w.rl t0, a6, (a2)
; RV64IA-NEXT: bnez t0, .LBB0_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: srlw a0, a3, a0
; RV64IA-NEXT: ret
%result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
ret i8 %result
}
define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; RV32I-LABEL: atomicrmw_usub_cond_i16:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: .cfi_offset s3, -20
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_2
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: lui s2, 16
; RV32I-NEXT: addi s2, s2, -1
; RV32I-NEXT: and s3, s0, s2
; RV32I-NEXT: .LBB1_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
; RV32I-NEXT: sltu a0, a0, s3
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: sub a2, a1, a0
; RV32I-NEXT: sh a1, 10(sp)
; RV32I-NEXT: addi a1, sp, 10
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_2
; RV32I-NEXT: lh a1, 10(sp)
; RV32I-NEXT: beqz a0, .LBB1_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: .cfi_restore s3
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_cond_i16:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
; RV32IA-NEXT: slli a4, a0, 3
; RV32IA-NEXT: lui a3, 16
; RV32IA-NEXT: andi a0, a4, 24
; RV32IA-NEXT: addi a3, a3, -1
; RV32IA-NEXT: lw a6, 0(a2)
; RV32IA-NEXT: sll a4, a3, a4
; RV32IA-NEXT: not a4, a4
; RV32IA-NEXT: and a5, a1, a3
; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB1_3 Depth 2
; RV32IA-NEXT: mv a7, a6
; RV32IA-NEXT: srl a6, a6, a0
; RV32IA-NEXT: and t0, a6, a3
; RV32IA-NEXT: sltu t0, t0, a5
; RV32IA-NEXT: addi t0, t0, -1
; RV32IA-NEXT: and t0, t0, a1
; RV32IA-NEXT: sub a6, a6, t0
; RV32IA-NEXT: and a6, a6, a3
; RV32IA-NEXT: sll a6, a6, a0
; RV32IA-NEXT: and t0, a7, a4
; RV32IA-NEXT: or t0, t0, a6
; RV32IA-NEXT: .LBB1_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a6, (a2)
; RV32IA-NEXT: bne a6, a7, .LBB1_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2
; RV32IA-NEXT: sc.w.rl t1, t0, (a2)
; RV32IA-NEXT: bnez t1, .LBB1_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: srl a0, a6, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_cond_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: .cfi_offset s3, -40
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_2
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: lui s2, 16
; RV64I-NEXT: addi s2, s2, -1
; RV64I-NEXT: and s3, s0, s2
; RV64I-NEXT: .LBB1_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: and a0, a1, s2
; RV64I-NEXT: sltu a0, a0, s3
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: sub a2, a1, a0
; RV64I-NEXT: sh a1, 6(sp)
; RV64I-NEXT: addi a1, sp, 6
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_2
; RV64I-NEXT: lh a1, 6(sp)
; RV64I-NEXT: beqz a0, .LBB1_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: .cfi_restore s2
; RV64I-NEXT: .cfi_restore s3
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_cond_i16:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a5, a0, 3
; RV64IA-NEXT: lui a3, 16
; RV64IA-NEXT: andi a0, a5, 24
; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: lw a4, 0(a2)
; RV64IA-NEXT: sllw a5, a3, a5
; RV64IA-NEXT: not a5, a5
; RV64IA-NEXT: and a6, a1, a3
; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB1_3 Depth 2
; RV64IA-NEXT: srlw a7, a4, a0
; RV64IA-NEXT: sext.w t0, a4
; RV64IA-NEXT: and t1, a7, a3
; RV64IA-NEXT: sltu t1, t1, a6
; RV64IA-NEXT: addi t1, t1, -1
; RV64IA-NEXT: and t1, t1, a1
; RV64IA-NEXT: sub a7, a7, t1
; RV64IA-NEXT: and a7, a7, a3
; RV64IA-NEXT: sllw a7, a7, a0
; RV64IA-NEXT: and a4, a4, a5
; RV64IA-NEXT: or a7, a4, a7
; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a4, (a2)
; RV64IA-NEXT: bne a4, t0, .LBB1_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2
; RV64IA-NEXT: sc.w.rl t1, a7, (a2)
; RV64IA-NEXT: bnez t1, .LBB1_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: srlw a0, a4, a0
; RV64IA-NEXT: ret
%result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
ret i16 %result
}
define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; RV32I-LABEL: atomicrmw_usub_cond_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_4
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: .LBB2_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sltu a0, a1, s0
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: sub a2, a1, a0
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a1, 0(sp)
; RV32I-NEXT: beqz a0, .LBB2_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_cond_i32:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lw a2, 0(a0)
; RV32IA-NEXT: .LBB2_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB2_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: sltu a2, a2, a1
; RV32IA-NEXT: addi a2, a2, -1
; RV32IA-NEXT: and a2, a2, a1
; RV32IA-NEXT: sub a4, a3, a2
; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a2, (a0)
; RV32IA-NEXT: bne a2, a3, .LBB2_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2
; RV32IA-NEXT: sc.w.rl a5, a4, (a0)
; RV32IA-NEXT: bnez a5, .LBB2_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a2
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_cond_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_4
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: sext.w s2, s0
; RV64I-NEXT: .LBB2_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: sext.w a0, a1
; RV64I-NEXT: sltu a0, a0, s2
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: subw a2, a1, a0
; RV64I-NEXT: sw a1, 12(sp)
; RV64I-NEXT: addi a1, sp, 12
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 12(sp)
; RV64I-NEXT: beqz a0, .LBB2_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: .cfi_restore s2
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_cond_i32:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lw a2, 0(a0)
; RV64IA-NEXT: sext.w a3, a1
; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB2_3 Depth 2
; RV64IA-NEXT: sext.w a4, a2
; RV64IA-NEXT: sltu a5, a4, a3
; RV64IA-NEXT: addi a5, a5, -1
; RV64IA-NEXT: and a5, a5, a1
; RV64IA-NEXT: subw a5, a2, a5
; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a2, (a0)
; RV64IA-NEXT: bne a2, a4, .LBB2_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2
; RV64IA-NEXT: sc.w.rl a6, a5, (a0)
; RV64IA-NEXT: bnez a6, .LBB2_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: mv a0, a2
; RV64IA-NEXT: ret
%result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst
ret i32 %result
}
define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; RV32I-LABEL: atomicrmw_usub_cond_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a2
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_8
; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: j .LBB3_3
; RV32I-NEXT: .LBB3_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: sltu a0, a1, s0
; RV32I-NEXT: .LBB3_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: neg a0, a0
; RV32I-NEXT: and a2, a0, s1
; RV32I-NEXT: and a0, a0, s0
; RV32I-NEXT: sltu a3, a4, a2
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: sub a2, a4, a2
; RV32I-NEXT: sub a3, a0, a3
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __atomic_compare_exchange_8
; RV32I-NEXT: lw a4, 8(sp)
; RV32I-NEXT: lw a1, 12(sp)
; RV32I-NEXT: bnez a0, .LBB3_5
; RV32I-NEXT: .LBB3_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: bne a1, s0, .LBB3_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
; RV32I-NEXT: sltu a0, a4, s1
; RV32I-NEXT: j .LBB3_2
; RV32I-NEXT: .LBB3_5: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_cond_i64:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
; RV32IA-NEXT: .cfi_def_cfa_offset 32
; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: .cfi_offset ra, -4
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a2
; RV32IA-NEXT: mv s1, a1
; RV32IA-NEXT: mv s2, a0
; RV32IA-NEXT: li a1, 0
; RV32IA-NEXT: call __atomic_load_8
; RV32IA-NEXT: mv a4, a0
; RV32IA-NEXT: j .LBB3_3
; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: sltu a0, a1, s0
; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: xori a0, a0, 1
; RV32IA-NEXT: neg a0, a0
; RV32IA-NEXT: and a2, a0, s1
; RV32IA-NEXT: and a0, a0, s0
; RV32IA-NEXT: sltu a3, a4, a2
; RV32IA-NEXT: sub a0, a1, a0
; RV32IA-NEXT: sub a2, a4, a2
; RV32IA-NEXT: sub a3, a0, a3
; RV32IA-NEXT: sw a4, 8(sp)
; RV32IA-NEXT: sw a1, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s2
; RV32IA-NEXT: call __atomic_compare_exchange_8
; RV32IA-NEXT: lw a4, 8(sp)
; RV32IA-NEXT: lw a1, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB3_5
; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: bne a1, s0, .LBB3_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
; RV32IA-NEXT: sltu a0, a4, s1
; RV32IA-NEXT: j .LBB3_2
; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: .cfi_restore ra
; RV32IA-NEXT: .cfi_restore s0
; RV32IA-NEXT: .cfi_restore s1
; RV32IA-NEXT: .cfi_restore s2
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: .cfi_def_cfa_offset 0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_cond_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_8
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: .LBB3_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: sltu a0, a1, s0
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a0, s0
; RV64I-NEXT: sub a2, a1, a0
; RV64I-NEXT: sd a1, 0(sp)
; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_8
; RV64I-NEXT: ld a1, 0(sp)
; RV64I-NEXT: beqz a0, .LBB3_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_cond_i64:
; RV64IA: # %bb.0:
; RV64IA-NEXT: ld a2, 0(a0)
; RV64IA-NEXT: .LBB3_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB3_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: sltu a2, a2, a1
; RV64IA-NEXT: addi a2, a2, -1
; RV64IA-NEXT: and a2, a2, a1
; RV64IA-NEXT: sub a4, a3, a2
; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.d.aqrl a2, (a0)
; RV64IA-NEXT: bne a2, a3, .LBB3_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2
; RV64IA-NEXT: sc.d.rl a5, a4, (a0)
; RV64IA-NEXT: bnez a5, .LBB3_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: mv a0, a2
; RV64IA-NEXT: ret
%result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst
ret i64 %result
}
define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; RV32I-LABEL: atomicrmw_usub_sat_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_1
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: zext.b s1, s1
; RV32I-NEXT: .LBB4_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: zext.b a0, a1
; RV32I-NEXT: sub a2, a0, s1
; RV32I-NEXT: sltu a0, a0, a2
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: sb a1, 3(sp)
; RV32I-NEXT: addi a1, sp, 3
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_1
; RV32I-NEXT: lbu a1, 3(sp)
; RV32I-NEXT: beqz a0, .LBB4_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_sat_i8:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
; RV32IA-NEXT: slli a0, a0, 3
; RV32IA-NEXT: li a3, 255
; RV32IA-NEXT: sll a3, a3, a0
; RV32IA-NEXT: lw a4, 0(a2)
; RV32IA-NEXT: andi a0, a0, 24
; RV32IA-NEXT: not a3, a3
; RV32IA-NEXT: zext.b a1, a1
; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB4_3 Depth 2
; RV32IA-NEXT: mv a5, a4
; RV32IA-NEXT: srl a4, a4, a0
; RV32IA-NEXT: zext.b a4, a4
; RV32IA-NEXT: sub a6, a4, a1
; RV32IA-NEXT: sltu a4, a4, a6
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: and a4, a4, a6
; RV32IA-NEXT: sll a4, a4, a0
; RV32IA-NEXT: and a6, a5, a3
; RV32IA-NEXT: or a6, a6, a4
; RV32IA-NEXT: .LBB4_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a4, (a2)
; RV32IA-NEXT: bne a4, a5, .LBB4_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2
; RV32IA-NEXT: sc.w.rl a7, a6, (a2)
; RV32IA-NEXT: bnez a7, .LBB4_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: srl a0, a4, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_sat_i8:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_1
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: zext.b s1, s1
; RV64I-NEXT: .LBB4_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: zext.b a0, a1
; RV64I-NEXT: sub a2, a0, s1
; RV64I-NEXT: sltu a0, a0, a2
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a2, a0, a2
; RV64I-NEXT: sb a1, 7(sp)
; RV64I-NEXT: addi a1, sp, 7
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_1
; RV64I-NEXT: lbu a1, 7(sp)
; RV64I-NEXT: beqz a0, .LBB4_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_sat_i8:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a0, a0, 3
; RV64IA-NEXT: li a3, 255
; RV64IA-NEXT: sllw a4, a3, a0
; RV64IA-NEXT: lw a3, 0(a2)
; RV64IA-NEXT: andi a0, a0, 24
; RV64IA-NEXT: not a4, a4
; RV64IA-NEXT: zext.b a1, a1
; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB4_3 Depth 2
; RV64IA-NEXT: srlw a5, a3, a0
; RV64IA-NEXT: sext.w a6, a3
; RV64IA-NEXT: zext.b a5, a5
; RV64IA-NEXT: sub a7, a5, a1
; RV64IA-NEXT: sltu a5, a5, a7
; RV64IA-NEXT: addi a5, a5, -1
; RV64IA-NEXT: and a5, a5, a7
; RV64IA-NEXT: sllw a5, a5, a0
; RV64IA-NEXT: and a3, a3, a4
; RV64IA-NEXT: or a5, a3, a5
; RV64IA-NEXT: .LBB4_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a3, (a2)
; RV64IA-NEXT: bne a3, a6, .LBB4_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2
; RV64IA-NEXT: sc.w.rl a7, a5, (a2)
; RV64IA-NEXT: bnez a7, .LBB4_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: srlw a0, a3, a0
; RV64IA-NEXT: ret
%result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
ret i8 %result
}
define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
; RV32I-LABEL: atomicrmw_usub_sat_i16:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_2
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: lui s2, 16
; RV32I-NEXT: addi s2, s2, -1
; RV32I-NEXT: and s1, s1, s2
; RV32I-NEXT: .LBB5_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: and a0, a1, s2
; RV32I-NEXT: sub a2, a0, s1
; RV32I-NEXT: sltu a0, a0, a2
; RV32I-NEXT: addi a0, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: sh a1, 14(sp)
; RV32I-NEXT: addi a1, sp, 14
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s0
; RV32I-NEXT: call __atomic_compare_exchange_2
; RV32I-NEXT: lh a1, 14(sp)
; RV32I-NEXT: beqz a0, .LBB5_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_sat_i16:
; RV32IA: # %bb.0:
; RV32IA-NEXT: andi a2, a0, -4
; RV32IA-NEXT: slli a4, a0, 3
; RV32IA-NEXT: lui a3, 16
; RV32IA-NEXT: andi a0, a4, 24
; RV32IA-NEXT: addi a3, a3, -1
; RV32IA-NEXT: lw a5, 0(a2)
; RV32IA-NEXT: sll a4, a3, a4
; RV32IA-NEXT: not a4, a4
; RV32IA-NEXT: and a1, a1, a3
; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB5_3 Depth 2
; RV32IA-NEXT: mv a6, a5
; RV32IA-NEXT: srl a5, a5, a0
; RV32IA-NEXT: and a5, a5, a3
; RV32IA-NEXT: sub a7, a5, a1
; RV32IA-NEXT: sltu a5, a5, a7
; RV32IA-NEXT: addi a5, a5, -1
; RV32IA-NEXT: and a5, a5, a7
; RV32IA-NEXT: sll a5, a5, a0
; RV32IA-NEXT: and a7, a6, a4
; RV32IA-NEXT: or a7, a7, a5
; RV32IA-NEXT: .LBB5_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB5_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a5, (a2)
; RV32IA-NEXT: bne a5, a6, .LBB5_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB5_3 Depth=2
; RV32IA-NEXT: sc.w.rl t0, a7, (a2)
; RV32IA-NEXT: bnez t0, .LBB5_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: srl a0, a5, a0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_sat_i16:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: .cfi_def_cfa_offset 48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: .cfi_offset s2, -32
; RV64I-NEXT: mv s1, a1
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_2
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: lui s2, 16
; RV64I-NEXT: addi s2, s2, -1
; RV64I-NEXT: and s1, s1, s2
; RV64I-NEXT: .LBB5_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: and a0, a1, s2
; RV64I-NEXT: sub a2, a0, s1
; RV64I-NEXT: sltu a0, a0, a2
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a2, a0, a2
; RV64I-NEXT: sh a1, 14(sp)
; RV64I-NEXT: addi a1, sp, 14
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s0
; RV64I-NEXT: call __atomic_compare_exchange_2
; RV64I-NEXT: lh a1, 14(sp)
; RV64I-NEXT: beqz a0, .LBB5_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: .cfi_restore s2
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_sat_i16:
; RV64IA: # %bb.0:
; RV64IA-NEXT: andi a2, a0, -4
; RV64IA-NEXT: slli a5, a0, 3
; RV64IA-NEXT: lui a3, 16
; RV64IA-NEXT: andi a0, a5, 24
; RV64IA-NEXT: addi a3, a3, -1
; RV64IA-NEXT: lw a4, 0(a2)
; RV64IA-NEXT: sllw a5, a3, a5
; RV64IA-NEXT: not a5, a5
; RV64IA-NEXT: and a1, a1, a3
; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB5_3 Depth 2
; RV64IA-NEXT: srlw a6, a4, a0
; RV64IA-NEXT: sext.w a7, a4
; RV64IA-NEXT: and a6, a6, a3
; RV64IA-NEXT: sub t0, a6, a1
; RV64IA-NEXT: sltu a6, a6, t0
; RV64IA-NEXT: addi a6, a6, -1
; RV64IA-NEXT: and a6, a6, t0
; RV64IA-NEXT: sllw a6, a6, a0
; RV64IA-NEXT: and a4, a4, a5
; RV64IA-NEXT: or a6, a4, a6
; RV64IA-NEXT: .LBB5_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB5_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a4, (a2)
; RV64IA-NEXT: bne a4, a7, .LBB5_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB5_3 Depth=2
; RV64IA-NEXT: sc.w.rl t0, a6, (a2)
; RV64IA-NEXT: bnez t0, .LBB5_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: srlw a0, a4, a0
; RV64IA-NEXT: ret
%result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
ret i16 %result
}
define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
; RV32I-LABEL: atomicrmw_usub_sat_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: .cfi_def_cfa_offset 16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: mv s0, a1
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_4
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: .LBB6_1: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sub a0, a1, s0
; RV32I-NEXT: sltu a2, a1, a0
; RV32I-NEXT: addi a2, a2, -1
; RV32I-NEXT: and a2, a2, a0
; RV32I-NEXT: sw a1, 0(sp)
; RV32I-NEXT: mv a1, sp
; RV32I-NEXT: li a3, 5
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __atomic_compare_exchange_4
; RV32I-NEXT: lw a1, 0(sp)
; RV32I-NEXT: beqz a0, .LBB6_1
; RV32I-NEXT: # %bb.2: # %atomicrmw.end
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_sat_i32:
; RV32IA: # %bb.0:
; RV32IA-NEXT: lw a2, 0(a0)
; RV32IA-NEXT: .LBB6_1: # %atomicrmw.start
; RV32IA-NEXT: # =>This Loop Header: Depth=1
; RV32IA-NEXT: # Child Loop BB6_3 Depth 2
; RV32IA-NEXT: mv a3, a2
; RV32IA-NEXT: sub a2, a2, a1
; RV32IA-NEXT: sltu a4, a3, a2
; RV32IA-NEXT: addi a4, a4, -1
; RV32IA-NEXT: and a4, a4, a2
; RV32IA-NEXT: .LBB6_3: # %atomicrmw.start
; RV32IA-NEXT: # Parent Loop BB6_1 Depth=1
; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
; RV32IA-NEXT: lr.w.aqrl a2, (a0)
; RV32IA-NEXT: bne a2, a3, .LBB6_1
; RV32IA-NEXT: # %bb.4: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB6_3 Depth=2
; RV32IA-NEXT: sc.w.rl a5, a4, (a0)
; RV32IA-NEXT: bnez a5, .LBB6_3
; RV32IA-NEXT: # %bb.5: # %atomicrmw.start
; RV32IA-NEXT: # %bb.2: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a2
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_sat_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_4
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: .LBB6_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: subw a0, a1, s0
; RV64I-NEXT: sext.w a2, a1
; RV64I-NEXT: sltu a2, a2, a0
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: and a2, a2, a0
; RV64I-NEXT: sw a1, 4(sp)
; RV64I-NEXT: addi a1, sp, 4
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_4
; RV64I-NEXT: lw a1, 4(sp)
; RV64I-NEXT: beqz a0, .LBB6_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_sat_i32:
; RV64IA: # %bb.0:
; RV64IA-NEXT: lw a2, 0(a0)
; RV64IA-NEXT: .LBB6_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB6_3 Depth 2
; RV64IA-NEXT: subw a3, a2, a1
; RV64IA-NEXT: sext.w a4, a2
; RV64IA-NEXT: sltu a2, a4, a3
; RV64IA-NEXT: addi a2, a2, -1
; RV64IA-NEXT: and a3, a2, a3
; RV64IA-NEXT: .LBB6_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB6_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.w.aqrl a2, (a0)
; RV64IA-NEXT: bne a2, a4, .LBB6_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB6_3 Depth=2
; RV64IA-NEXT: sc.w.rl a5, a3, (a0)
; RV64IA-NEXT: bnez a5, .LBB6_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: mv a0, a2
; RV64IA-NEXT: ret
%result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst
ret i32 %result
}
define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; RV32I-LABEL: atomicrmw_usub_sat_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -32
; RV32I-NEXT: .cfi_def_cfa_offset 32
; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32I-NEXT: .cfi_offset ra, -4
; RV32I-NEXT: .cfi_offset s0, -8
; RV32I-NEXT: .cfi_offset s1, -12
; RV32I-NEXT: .cfi_offset s2, -16
; RV32I-NEXT: mv s0, a2
; RV32I-NEXT: mv s1, a1
; RV32I-NEXT: mv s2, a0
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: call __atomic_load_8
; RV32I-NEXT: mv a4, a0
; RV32I-NEXT: j .LBB7_3
; RV32I-NEXT: .LBB7_1: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32I-NEXT: sltu a3, a1, a0
; RV32I-NEXT: .LBB7_2: # %atomicrmw.start
; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32I-NEXT: addi a3, a3, -1
; RV32I-NEXT: and a2, a3, a2
; RV32I-NEXT: and a3, a3, a0
; RV32I-NEXT: sw a4, 8(sp)
; RV32I-NEXT: sw a1, 12(sp)
; RV32I-NEXT: addi a1, sp, 8
; RV32I-NEXT: li a4, 5
; RV32I-NEXT: li a5, 5
; RV32I-NEXT: mv a0, s2
; RV32I-NEXT: call __atomic_compare_exchange_8
; RV32I-NEXT: lw a4, 8(sp)
; RV32I-NEXT: lw a1, 12(sp)
; RV32I-NEXT: bnez a0, .LBB7_5
; RV32I-NEXT: .LBB7_3: # %atomicrmw.start
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: sltu a0, a4, s1
; RV32I-NEXT: sub a2, a1, s0
; RV32I-NEXT: sub a0, a2, a0
; RV32I-NEXT: sub a2, a4, s1
; RV32I-NEXT: bne a0, a1, .LBB7_1
; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
; RV32I-NEXT: sltu a3, a4, a2
; RV32I-NEXT: j .LBB7_2
; RV32I-NEXT: .LBB7_5: # %atomicrmw.end
; RV32I-NEXT: mv a0, a4
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32I-NEXT: .cfi_restore ra
; RV32I-NEXT: .cfi_restore s0
; RV32I-NEXT: .cfi_restore s1
; RV32I-NEXT: .cfi_restore s2
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: .cfi_def_cfa_offset 0
; RV32I-NEXT: ret
;
; RV32IA-LABEL: atomicrmw_usub_sat_i64:
; RV32IA: # %bb.0:
; RV32IA-NEXT: addi sp, sp, -32
; RV32IA-NEXT: .cfi_def_cfa_offset 32
; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
; RV32IA-NEXT: .cfi_offset ra, -4
; RV32IA-NEXT: .cfi_offset s0, -8
; RV32IA-NEXT: .cfi_offset s1, -12
; RV32IA-NEXT: .cfi_offset s2, -16
; RV32IA-NEXT: mv s0, a2
; RV32IA-NEXT: mv s1, a1
; RV32IA-NEXT: mv s2, a0
; RV32IA-NEXT: li a1, 0
; RV32IA-NEXT: call __atomic_load_8
; RV32IA-NEXT: mv a4, a0
; RV32IA-NEXT: j .LBB7_3
; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32IA-NEXT: sltu a3, a1, a0
; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start
; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1
; RV32IA-NEXT: addi a3, a3, -1
; RV32IA-NEXT: and a2, a3, a2
; RV32IA-NEXT: and a3, a3, a0
; RV32IA-NEXT: sw a4, 8(sp)
; RV32IA-NEXT: sw a1, 12(sp)
; RV32IA-NEXT: addi a1, sp, 8
; RV32IA-NEXT: li a4, 5
; RV32IA-NEXT: li a5, 5
; RV32IA-NEXT: mv a0, s2
; RV32IA-NEXT: call __atomic_compare_exchange_8
; RV32IA-NEXT: lw a4, 8(sp)
; RV32IA-NEXT: lw a1, 12(sp)
; RV32IA-NEXT: bnez a0, .LBB7_5
; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start
; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1
; RV32IA-NEXT: sltu a0, a4, s1
; RV32IA-NEXT: sub a2, a1, s0
; RV32IA-NEXT: sub a0, a2, a0
; RV32IA-NEXT: sub a2, a4, s1
; RV32IA-NEXT: bne a0, a1, .LBB7_1
; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1
; RV32IA-NEXT: sltu a3, a4, a2
; RV32IA-NEXT: j .LBB7_2
; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end
; RV32IA-NEXT: mv a0, a4
; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
; RV32IA-NEXT: .cfi_restore ra
; RV32IA-NEXT: .cfi_restore s0
; RV32IA-NEXT: .cfi_restore s1
; RV32IA-NEXT: .cfi_restore s2
; RV32IA-NEXT: addi sp, sp, 32
; RV32IA-NEXT: .cfi_def_cfa_offset 0
; RV32IA-NEXT: ret
;
; RV64I-LABEL: atomicrmw_usub_sat_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: .cfi_def_cfa_offset 32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: .cfi_offset ra, -8
; RV64I-NEXT: .cfi_offset s0, -16
; RV64I-NEXT: .cfi_offset s1, -24
; RV64I-NEXT: mv s0, a1
; RV64I-NEXT: mv s1, a0
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: call __atomic_load_8
; RV64I-NEXT: mv a1, a0
; RV64I-NEXT: .LBB7_1: # %atomicrmw.start
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: sub a0, a1, s0
; RV64I-NEXT: sltu a2, a1, a0
; RV64I-NEXT: addi a2, a2, -1
; RV64I-NEXT: and a2, a2, a0
; RV64I-NEXT: sd a1, 0(sp)
; RV64I-NEXT: mv a1, sp
; RV64I-NEXT: li a3, 5
; RV64I-NEXT: li a4, 5
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __atomic_compare_exchange_8
; RV64I-NEXT: ld a1, 0(sp)
; RV64I-NEXT: beqz a0, .LBB7_1
; RV64I-NEXT: # %bb.2: # %atomicrmw.end
; RV64I-NEXT: mv a0, a1
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: .cfi_restore ra
; RV64I-NEXT: .cfi_restore s0
; RV64I-NEXT: .cfi_restore s1
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: .cfi_def_cfa_offset 0
; RV64I-NEXT: ret
;
; RV64IA-LABEL: atomicrmw_usub_sat_i64:
; RV64IA: # %bb.0:
; RV64IA-NEXT: ld a2, 0(a0)
; RV64IA-NEXT: .LBB7_1: # %atomicrmw.start
; RV64IA-NEXT: # =>This Loop Header: Depth=1
; RV64IA-NEXT: # Child Loop BB7_3 Depth 2
; RV64IA-NEXT: mv a3, a2
; RV64IA-NEXT: sub a2, a2, a1
; RV64IA-NEXT: sltu a4, a3, a2
; RV64IA-NEXT: addi a4, a4, -1
; RV64IA-NEXT: and a4, a4, a2
; RV64IA-NEXT: .LBB7_3: # %atomicrmw.start
; RV64IA-NEXT: # Parent Loop BB7_1 Depth=1
; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
; RV64IA-NEXT: lr.d.aqrl a2, (a0)
; RV64IA-NEXT: bne a2, a3, .LBB7_1
; RV64IA-NEXT: # %bb.4: # %atomicrmw.start
; RV64IA-NEXT: # in Loop: Header=BB7_3 Depth=2
; RV64IA-NEXT: sc.d.rl a5, a4, (a0)
; RV64IA-NEXT: bnez a5, .LBB7_3
; RV64IA-NEXT: # %bb.5: # %atomicrmw.start
; RV64IA-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-NEXT: mv a0, a2
; RV64IA-NEXT: ret
%result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst
ret i64 %result
}