The fix does the following in expandPartwordCmpXchg and insertRMWCmpXchgLoop. - Issues volatile operations in the emulation loops if the original operation is volatile. - A preheader load is used for initializing the "cmp" and "new" values of the cmpxchg in the loop. Makes this load atomic. This is done under a target hook (`issueAtomicInitLoadForAtomicEmulation()`) , to allow backends to migrate independently. - `processAtomicInstr` is called on this load, to massage it into something that can be lowered in SelectionDAG / GISel. - This caused 3 kinds of failures. 1. Caused by change to codegen: updated these either using the scripts, or mechanically (using claude) to match the new codegen. 2. Crashes caused by newly created atomic loads not being processed by AtomicExpandPass. (The atomic load if tested in an independent test does not cause a crash). To fix these, added recursive calls to processAtomicInstr on the newly created atomic loads. These calls convert the loads to libcalls, or cast them to integer types. 3. Crashes in X86, AMDGPU, and AArch64 caused by unhandled vector types. These loads crash even with upstream LLVM, due to the lack of support in these targets for vector atomic loads (the corresponding vector atomicrmw instructions are supported). Disabled issuing atomic loads for these backends. Will follow up with individual PRs to revert to default behavior.
1083 lines
42 KiB
LLVM
1083 lines
42 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: not llc < %s -mcpu=sm_90 -mattr=+ptx82 2>&1 | FileCheck %s --check-prefix=ERROR
|
|
; RUN: not llc < %s -mcpu=sm_80 -mattr=+ptx84 2>&1 | FileCheck %s --check-prefix=ERROR
|
|
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx84 | FileCheck %s --check-prefix=CHECK
|
|
; RUN: %if ptxas-sm_90 && ptxas-isa-8.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %}
|
|
|
|
;; TODO: Update cmpxchg.py so that it can automatically generate the IR for
|
|
;; these test cases.
|
|
|
|
target triple = "nvptx64-nvidia-cuda"
|
|
|
|
;; Check that the first couple of error messages are correct.
|
|
; ERROR: error: unsupported atomicrmw xchg: target supports atomics up to 8 bytes, but this atomic accesses 16 bytes
|
|
; ERROR: error: unsupported atomicrmw xchg: target supports atomics up to 8 bytes, but this atomic accesses 16 bytes
|
|
|
|
define i128 @test_xchg_generic(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_generic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_generic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_generic_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_global(ptr addrspace(1) %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_global(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_global_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_global_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.global.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr addrspace(1) %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_shared(ptr addrspace(3) %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_shared(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.shared.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr addrspace(3) %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_shared_cluster(ptr addrspace(7) %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_shared_cluster(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_cluster_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_cluster_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.shared::cluster.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr addrspace(7) %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_block(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_block(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_block_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_block_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.cta.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt syncscope("block") release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_cluster(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_cluster(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_cluster_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_cluster_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.cluster.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt syncscope("cluster") release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_gpu(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_gpu(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_gpu_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_gpu_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.gpu.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt syncscope("device") release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_sys(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_sys(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_sys_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_sys_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_relaxed(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_relaxed(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_relaxed_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_relaxed_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.relaxed.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt monotonic
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_acquire(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acquire_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acquire_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.acquire.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt acquire
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_release(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_release(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_release_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_release_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt release
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_xchg_acq_rel(ptr %addr, i128 %amt) {
|
|
; CHECK-LABEL: test_xchg_acq_rel(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<6>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acq_rel_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acq_rel_param_1];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3};
|
|
; CHECK-NEXT: atom.acq_rel.sys.exch.b128 dst, [%rd1], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%old = atomicrmw xchg ptr %addr, i128 %amt acq_rel
|
|
ret i128 %old
|
|
}
|
|
|
|
define i128 @test_cmpxchg_generic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_generic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_generic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_generic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_generic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_global(ptr addrspace(1) %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_global(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_global_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_global_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_global_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.sys.global.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr addrspace(1) %addr, i128 %cmp, i128 %new monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_shared(ptr addrspace(3) %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_shared(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.sys.shared.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr addrspace(3) %addr, i128 %cmp, i128 %new monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_block(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_block(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_block_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_block_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_block_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.cta.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("block") monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_cluster(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_cluster(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_cluster_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_cluster_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_cluster_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.cluster.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("cluster") monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_gpu(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_gpu(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_gpu_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_gpu_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_gpu_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.gpu.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("device") monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_shared_cluster(ptr addrspace(7) %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_shared_cluster(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_cluster_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_cluster_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_cluster_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.sys.shared::cluster.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr addrspace(7) %addr, i128 %cmp, i128 %new monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_monotonic_monotonic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_monotonic_monotonic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_monotonic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_monotonic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_monotonic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_monotonic_acquire(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_monotonic_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_acquire_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_acquire_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_acquire_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic acquire
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_monotonic_seq_cst(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic seq_cst
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acquire_monotonic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acquire_monotonic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_monotonic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_monotonic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_monotonic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acquire_acquire(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acquire_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_acquire_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_acquire_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_acquire_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire acquire
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acquire_seq_cst(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire seq_cst
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_release_monotonic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_release_monotonic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_monotonic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_monotonic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_monotonic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.release.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_release_acquire(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_release_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_acquire_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_acquire_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_acquire_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release acquire
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_release_seq_cst(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release seq_cst
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acq_rel_monotonic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acq_rel_monotonic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_monotonic_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_monotonic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_monotonic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acq_rel_acquire(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acq_rel_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_acquire_param_0];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_acquire_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_acquire_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel acquire
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_acq_rel_seq_cst(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel seq_cst
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_seq_cst_monotonic(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst monotonic
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_seq_cst_acquire(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst acquire
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
|
|
; CHECK-LABEL: test_cmpxchg_seq_cst_seq_cst(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<8>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
|
|
; CHECK-NEXT: fence.sc.sys;
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5};
|
|
; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5};
|
|
; CHECK-NEXT: ret;
|
|
%pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst seq_cst
|
|
ret i128 %new
|
|
}
|
|
|
|
define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_and(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_and_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_and_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: and.b64 %rd7, %rd12, %rd4;
|
|
; CHECK-NEXT: and.b64 %rd8, %rd13, %rd5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd8};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p1, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p1 bra $L__BB34_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw and ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_or(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_or_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_or_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: or.b64 %rd7, %rd12, %rd4;
|
|
; CHECK-NEXT: or.b64 %rd8, %rd13, %rd5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd8};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p1, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p1 bra $L__BB35_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw or ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_xor(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<2>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_xor_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_xor_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd4;
|
|
; CHECK-NEXT: xor.b64 %rd8, %rd13, %rd5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd8};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p1, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p1 bra $L__BB36_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw xor ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_min(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<7>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_min_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_min_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: setp.lt.u64 %p1, %rd12, %rd4;
|
|
; CHECK-NEXT: setp.eq.b64 %p2, %rd13, %rd5;
|
|
; CHECK-NEXT: and.pred %p3, %p2, %p1;
|
|
; CHECK-NEXT: setp.lt.s64 %p4, %rd13, %rd5;
|
|
; CHECK-NEXT: or.pred %p5, %p3, %p4;
|
|
; CHECK-NEXT: selp.b64 %rd7, %rd13, %rd5, %p5;
|
|
; CHECK-NEXT: selp.b64 %rd8, %rd12, %rd4, %p5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd8, %rd7};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p6, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p6 bra $L__BB37_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw min ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_max(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<7>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_max_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_max_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: setp.gt.u64 %p1, %rd12, %rd4;
|
|
; CHECK-NEXT: setp.eq.b64 %p2, %rd13, %rd5;
|
|
; CHECK-NEXT: and.pred %p3, %p2, %p1;
|
|
; CHECK-NEXT: setp.gt.s64 %p4, %rd13, %rd5;
|
|
; CHECK-NEXT: or.pred %p5, %p3, %p4;
|
|
; CHECK-NEXT: selp.b64 %rd7, %rd13, %rd5, %p5;
|
|
; CHECK-NEXT: selp.b64 %rd8, %rd12, %rd4, %p5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd8, %rd7};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p6, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p6 bra $L__BB38_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw max ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_umin(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<7>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umin_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umin_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: setp.lt.u64 %p1, %rd12, %rd4;
|
|
; CHECK-NEXT: setp.eq.b64 %p2, %rd13, %rd5;
|
|
; CHECK-NEXT: and.pred %p3, %p2, %p1;
|
|
; CHECK-NEXT: setp.lt.u64 %p4, %rd13, %rd5;
|
|
; CHECK-NEXT: or.pred %p5, %p3, %p4;
|
|
; CHECK-NEXT: selp.b64 %rd7, %rd13, %rd5, %p5;
|
|
; CHECK-NEXT: selp.b64 %rd8, %rd12, %rd4, %p5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd8, %rd7};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p6, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p6 bra $L__BB39_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw umin ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
|
|
; CHECK-LABEL: test_atomicrmw_umax(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .pred %p<7>;
|
|
; CHECK-NEXT: .reg .b64 %rd<14>;
|
|
; CHECK-EMPTY:
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umax_param_1];
|
|
; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umax_param_0];
|
|
; CHECK-NEXT: mov.b64 %rd6, 0;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd6, %rd6};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd6};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
|
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: setp.gt.u64 %p1, %rd12, %rd4;
|
|
; CHECK-NEXT: setp.eq.b64 %p2, %rd13, %rd5;
|
|
; CHECK-NEXT: and.pred %p3, %p2, %p1;
|
|
; CHECK-NEXT: setp.gt.u64 %p4, %rd13, %rd5;
|
|
; CHECK-NEXT: or.pred %p5, %p3, %p4;
|
|
; CHECK-NEXT: selp.b64 %rd7, %rd13, %rd5, %p5;
|
|
; CHECK-NEXT: selp.b64 %rd8, %rd12, %rd4, %p5;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
|
|
; CHECK-NEXT: mov.b128 cmp, {%rd12, %rd13};
|
|
; CHECK-NEXT: mov.b128 swap, {%rd8, %rd7};
|
|
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
|
|
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: xor.b64 %rd9, %rd2, %rd13;
|
|
; CHECK-NEXT: xor.b64 %rd10, %rd1, %rd12;
|
|
; CHECK-NEXT: or.b64 %rd11, %rd10, %rd9;
|
|
; CHECK-NEXT: setp.ne.b64 %p6, %rd11, 0;
|
|
; CHECK-NEXT: mov.b64 %rd12, %rd1;
|
|
; CHECK-NEXT: mov.b64 %rd13, %rd2;
|
|
; CHECK-NEXT: @%p6 bra $L__BB40_1;
|
|
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
|
|
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
|
|
; CHECK-NEXT: ret;
|
|
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
|
|
ret i128 %ret
|
|
}
|
|
|
|
|
|
@si128 = internal addrspace(3) global i128 0, align 16
|
|
|
|
define void @test_atomicrmw_xchg_const() {
|
|
; CHECK-LABEL: test_atomicrmw_xchg_const(
|
|
; CHECK: {
|
|
; CHECK-NEXT: .reg .b64 %rd<5>;
|
|
; CHECK-NEXT: // demoted variable
|
|
; CHECK-NEXT: .shared .align 16 .b8 si128[16];
|
|
; CHECK-NEXT: // %bb.0:
|
|
; CHECK-NEXT: mov.b64 %rd1, 0;
|
|
; CHECK-NEXT: mov.b64 %rd2, 23;
|
|
; CHECK-NEXT: {
|
|
; CHECK-NEXT: .reg .b128 amt, dst;
|
|
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1};
|
|
; CHECK-NEXT: atom.relaxed.sys.shared.exch.b128 dst, [si128], amt;
|
|
; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst;
|
|
; CHECK-NEXT: }
|
|
; CHECK-NEXT: ret;
|
|
%res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 monotonic
|
|
ret void
|
|
}
|