4372 lines
185 KiB
LLVM
4372 lines
185 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=verde < %s | FileCheck -check-prefixes=SI %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX9 %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10 %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11 %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX12 %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX1250 %s
|
|
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s
|
|
|
|
define amdgpu_kernel void @test_clmul_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
|
; SI-LABEL: test_clmul_i32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_mov_b32 s10, s2
|
|
; SI-NEXT: s_mov_b32 s11, s3
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_mov_b32 s8, s6
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; SI-NEXT: s_mov_b32 s1, s5
|
|
; SI-NEXT: s_mov_b32 s0, s4
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_readfirstlane_b32 s5, v1
|
|
; SI-NEXT: v_readfirstlane_b32 s4, v0
|
|
; SI-NEXT: s_and_b32 s6, s5, 2
|
|
; SI-NEXT: s_and_b32 s7, s5, 1
|
|
; SI-NEXT: s_and_b32 s8, s5, 4
|
|
; SI-NEXT: s_mul_i32 s6, s4, s6
|
|
; SI-NEXT: s_mul_i32 s7, s4, s7
|
|
; SI-NEXT: s_and_b32 s9, s5, 8
|
|
; SI-NEXT: s_mul_i32 s8, s4, s8
|
|
; SI-NEXT: s_xor_b32 s6, s7, s6
|
|
; SI-NEXT: s_and_b32 s10, s5, 16
|
|
; SI-NEXT: s_mul_i32 s9, s4, s9
|
|
; SI-NEXT: s_xor_b32 s6, s6, s8
|
|
; SI-NEXT: s_and_b32 s11, s5, 32
|
|
; SI-NEXT: s_mul_i32 s10, s4, s10
|
|
; SI-NEXT: s_xor_b32 s6, s6, s9
|
|
; SI-NEXT: s_and_b32 s12, s5, 64
|
|
; SI-NEXT: s_mul_i32 s11, s4, s11
|
|
; SI-NEXT: s_xor_b32 s6, s6, s10
|
|
; SI-NEXT: s_and_b32 s13, s5, 0x80
|
|
; SI-NEXT: s_mul_i32 s12, s4, s12
|
|
; SI-NEXT: s_xor_b32 s6, s6, s11
|
|
; SI-NEXT: s_and_b32 s14, s5, 0x100
|
|
; SI-NEXT: s_mul_i32 s13, s4, s13
|
|
; SI-NEXT: s_xor_b32 s6, s6, s12
|
|
; SI-NEXT: s_and_b32 s15, s5, 0x200
|
|
; SI-NEXT: s_mul_i32 s14, s4, s14
|
|
; SI-NEXT: s_xor_b32 s6, s6, s13
|
|
; SI-NEXT: s_and_b32 s16, s5, 0x400
|
|
; SI-NEXT: s_mul_i32 s15, s4, s15
|
|
; SI-NEXT: s_xor_b32 s6, s6, s14
|
|
; SI-NEXT: s_and_b32 s17, s5, 0x800
|
|
; SI-NEXT: s_mul_i32 s16, s4, s16
|
|
; SI-NEXT: s_xor_b32 s6, s6, s15
|
|
; SI-NEXT: s_and_b32 s18, s5, 0x1000
|
|
; SI-NEXT: s_mul_i32 s17, s4, s17
|
|
; SI-NEXT: s_xor_b32 s6, s6, s16
|
|
; SI-NEXT: s_and_b32 s19, s5, 0x2000
|
|
; SI-NEXT: s_mul_i32 s18, s4, s18
|
|
; SI-NEXT: s_xor_b32 s6, s6, s17
|
|
; SI-NEXT: s_and_b32 s20, s5, 0x4000
|
|
; SI-NEXT: s_mul_i32 s19, s4, s19
|
|
; SI-NEXT: s_xor_b32 s6, s6, s18
|
|
; SI-NEXT: s_and_b32 s21, s5, 0x8000
|
|
; SI-NEXT: s_mul_i32 s20, s4, s20
|
|
; SI-NEXT: s_xor_b32 s6, s6, s19
|
|
; SI-NEXT: s_and_b32 s22, s5, 0x10000
|
|
; SI-NEXT: s_mul_i32 s21, s4, s21
|
|
; SI-NEXT: s_xor_b32 s6, s6, s20
|
|
; SI-NEXT: s_and_b32 s23, s5, 0x20000
|
|
; SI-NEXT: s_mul_i32 s22, s4, s22
|
|
; SI-NEXT: s_xor_b32 s6, s6, s21
|
|
; SI-NEXT: s_and_b32 s24, s5, 0x40000
|
|
; SI-NEXT: s_mul_i32 s23, s4, s23
|
|
; SI-NEXT: s_xor_b32 s6, s6, s22
|
|
; SI-NEXT: s_and_b32 s25, s5, 0x80000
|
|
; SI-NEXT: s_mul_i32 s24, s4, s24
|
|
; SI-NEXT: s_xor_b32 s6, s6, s23
|
|
; SI-NEXT: s_and_b32 s26, s5, 0x100000
|
|
; SI-NEXT: s_mul_i32 s25, s4, s25
|
|
; SI-NEXT: s_xor_b32 s6, s6, s24
|
|
; SI-NEXT: s_and_b32 s27, s5, 0x200000
|
|
; SI-NEXT: s_mul_i32 s26, s4, s26
|
|
; SI-NEXT: s_xor_b32 s6, s6, s25
|
|
; SI-NEXT: s_and_b32 s28, s5, 0x400000
|
|
; SI-NEXT: s_mul_i32 s27, s4, s27
|
|
; SI-NEXT: s_xor_b32 s6, s6, s26
|
|
; SI-NEXT: s_and_b32 s29, s5, 0x800000
|
|
; SI-NEXT: s_mul_i32 s28, s4, s28
|
|
; SI-NEXT: s_xor_b32 s6, s6, s27
|
|
; SI-NEXT: s_and_b32 s30, s5, 0x1000000
|
|
; SI-NEXT: s_mul_i32 s29, s4, s29
|
|
; SI-NEXT: s_xor_b32 s6, s6, s28
|
|
; SI-NEXT: s_and_b32 s31, s5, 0x2000000
|
|
; SI-NEXT: s_mul_i32 s30, s4, s30
|
|
; SI-NEXT: s_xor_b32 s6, s6, s29
|
|
; SI-NEXT: s_and_b32 s33, s5, 0x4000000
|
|
; SI-NEXT: s_mul_i32 s31, s4, s31
|
|
; SI-NEXT: s_xor_b32 s6, s6, s30
|
|
; SI-NEXT: s_and_b32 s34, s5, 0x8000000
|
|
; SI-NEXT: s_mul_i32 s33, s4, s33
|
|
; SI-NEXT: s_xor_b32 s6, s6, s31
|
|
; SI-NEXT: s_and_b32 s35, s5, 0x10000000
|
|
; SI-NEXT: s_mul_i32 s34, s4, s34
|
|
; SI-NEXT: s_xor_b32 s6, s6, s33
|
|
; SI-NEXT: s_and_b32 s36, s5, 0x20000000
|
|
; SI-NEXT: s_mul_i32 s35, s4, s35
|
|
; SI-NEXT: s_xor_b32 s6, s6, s34
|
|
; SI-NEXT: s_and_b32 s37, s5, 2.0
|
|
; SI-NEXT: s_mul_i32 s36, s4, s36
|
|
; SI-NEXT: s_xor_b32 s6, s6, s35
|
|
; SI-NEXT: s_and_b32 s5, s5, 0x80000000
|
|
; SI-NEXT: s_mul_i32 s37, s4, s37
|
|
; SI-NEXT: s_xor_b32 s6, s6, s36
|
|
; SI-NEXT: s_xor_b32 s6, s6, s37
|
|
; SI-NEXT: s_mul_i32 s4, s4, s5
|
|
; SI-NEXT: s_xor_b32 s4, s6, s4
|
|
; SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: test_clmul_i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: s_mov_b32 s10, s2
|
|
; VI-NEXT: s_mov_b32 s11, s3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s8, s6
|
|
; VI-NEXT: s_mov_b32 s9, s7
|
|
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; VI-NEXT: s_mov_b32 s1, s5
|
|
; VI-NEXT: s_mov_b32 s0, s4
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_readfirstlane_b32 s5, v1
|
|
; VI-NEXT: v_readfirstlane_b32 s4, v0
|
|
; VI-NEXT: s_and_b32 s6, s5, 2
|
|
; VI-NEXT: s_and_b32 s7, s5, 1
|
|
; VI-NEXT: s_and_b32 s8, s5, 4
|
|
; VI-NEXT: s_mul_i32 s6, s4, s6
|
|
; VI-NEXT: s_mul_i32 s7, s4, s7
|
|
; VI-NEXT: s_and_b32 s9, s5, 8
|
|
; VI-NEXT: s_mul_i32 s8, s4, s8
|
|
; VI-NEXT: s_xor_b32 s6, s7, s6
|
|
; VI-NEXT: s_and_b32 s10, s5, 16
|
|
; VI-NEXT: s_mul_i32 s9, s4, s9
|
|
; VI-NEXT: s_xor_b32 s6, s6, s8
|
|
; VI-NEXT: s_and_b32 s11, s5, 32
|
|
; VI-NEXT: s_mul_i32 s10, s4, s10
|
|
; VI-NEXT: s_xor_b32 s6, s6, s9
|
|
; VI-NEXT: s_and_b32 s12, s5, 64
|
|
; VI-NEXT: s_mul_i32 s11, s4, s11
|
|
; VI-NEXT: s_xor_b32 s6, s6, s10
|
|
; VI-NEXT: s_and_b32 s13, s5, 0x80
|
|
; VI-NEXT: s_mul_i32 s12, s4, s12
|
|
; VI-NEXT: s_xor_b32 s6, s6, s11
|
|
; VI-NEXT: s_and_b32 s14, s5, 0x100
|
|
; VI-NEXT: s_mul_i32 s13, s4, s13
|
|
; VI-NEXT: s_xor_b32 s6, s6, s12
|
|
; VI-NEXT: s_and_b32 s15, s5, 0x200
|
|
; VI-NEXT: s_mul_i32 s14, s4, s14
|
|
; VI-NEXT: s_xor_b32 s6, s6, s13
|
|
; VI-NEXT: s_and_b32 s16, s5, 0x400
|
|
; VI-NEXT: s_mul_i32 s15, s4, s15
|
|
; VI-NEXT: s_xor_b32 s6, s6, s14
|
|
; VI-NEXT: s_and_b32 s17, s5, 0x800
|
|
; VI-NEXT: s_mul_i32 s16, s4, s16
|
|
; VI-NEXT: s_xor_b32 s6, s6, s15
|
|
; VI-NEXT: s_and_b32 s18, s5, 0x1000
|
|
; VI-NEXT: s_mul_i32 s17, s4, s17
|
|
; VI-NEXT: s_xor_b32 s6, s6, s16
|
|
; VI-NEXT: s_and_b32 s19, s5, 0x2000
|
|
; VI-NEXT: s_mul_i32 s18, s4, s18
|
|
; VI-NEXT: s_xor_b32 s6, s6, s17
|
|
; VI-NEXT: s_and_b32 s20, s5, 0x4000
|
|
; VI-NEXT: s_mul_i32 s19, s4, s19
|
|
; VI-NEXT: s_xor_b32 s6, s6, s18
|
|
; VI-NEXT: s_and_b32 s21, s5, 0x8000
|
|
; VI-NEXT: s_mul_i32 s20, s4, s20
|
|
; VI-NEXT: s_xor_b32 s6, s6, s19
|
|
; VI-NEXT: s_and_b32 s22, s5, 0x10000
|
|
; VI-NEXT: s_mul_i32 s21, s4, s21
|
|
; VI-NEXT: s_xor_b32 s6, s6, s20
|
|
; VI-NEXT: s_and_b32 s23, s5, 0x20000
|
|
; VI-NEXT: s_mul_i32 s22, s4, s22
|
|
; VI-NEXT: s_xor_b32 s6, s6, s21
|
|
; VI-NEXT: s_and_b32 s24, s5, 0x40000
|
|
; VI-NEXT: s_mul_i32 s23, s4, s23
|
|
; VI-NEXT: s_xor_b32 s6, s6, s22
|
|
; VI-NEXT: s_and_b32 s25, s5, 0x80000
|
|
; VI-NEXT: s_mul_i32 s24, s4, s24
|
|
; VI-NEXT: s_xor_b32 s6, s6, s23
|
|
; VI-NEXT: s_and_b32 s26, s5, 0x100000
|
|
; VI-NEXT: s_mul_i32 s25, s4, s25
|
|
; VI-NEXT: s_xor_b32 s6, s6, s24
|
|
; VI-NEXT: s_and_b32 s27, s5, 0x200000
|
|
; VI-NEXT: s_mul_i32 s26, s4, s26
|
|
; VI-NEXT: s_xor_b32 s6, s6, s25
|
|
; VI-NEXT: s_and_b32 s28, s5, 0x400000
|
|
; VI-NEXT: s_mul_i32 s27, s4, s27
|
|
; VI-NEXT: s_xor_b32 s6, s6, s26
|
|
; VI-NEXT: s_and_b32 s29, s5, 0x800000
|
|
; VI-NEXT: s_mul_i32 s28, s4, s28
|
|
; VI-NEXT: s_xor_b32 s6, s6, s27
|
|
; VI-NEXT: s_and_b32 s30, s5, 0x1000000
|
|
; VI-NEXT: s_mul_i32 s29, s4, s29
|
|
; VI-NEXT: s_xor_b32 s6, s6, s28
|
|
; VI-NEXT: s_and_b32 s31, s5, 0x2000000
|
|
; VI-NEXT: s_mul_i32 s30, s4, s30
|
|
; VI-NEXT: s_xor_b32 s6, s6, s29
|
|
; VI-NEXT: s_and_b32 s33, s5, 0x4000000
|
|
; VI-NEXT: s_mul_i32 s31, s4, s31
|
|
; VI-NEXT: s_xor_b32 s6, s6, s30
|
|
; VI-NEXT: s_and_b32 s34, s5, 0x8000000
|
|
; VI-NEXT: s_mul_i32 s33, s4, s33
|
|
; VI-NEXT: s_xor_b32 s6, s6, s31
|
|
; VI-NEXT: s_and_b32 s35, s5, 0x10000000
|
|
; VI-NEXT: s_mul_i32 s34, s4, s34
|
|
; VI-NEXT: s_xor_b32 s6, s6, s33
|
|
; VI-NEXT: s_and_b32 s36, s5, 0x20000000
|
|
; VI-NEXT: s_mul_i32 s35, s4, s35
|
|
; VI-NEXT: s_xor_b32 s6, s6, s34
|
|
; VI-NEXT: s_and_b32 s37, s5, 2.0
|
|
; VI-NEXT: s_mul_i32 s36, s4, s36
|
|
; VI-NEXT: s_xor_b32 s6, s6, s35
|
|
; VI-NEXT: s_and_b32 s5, s5, 0x80000000
|
|
; VI-NEXT: s_mul_i32 s37, s4, s37
|
|
; VI-NEXT: s_xor_b32 s6, s6, s36
|
|
; VI-NEXT: s_xor_b32 s6, s6, s37
|
|
; VI-NEXT: s_mul_i32 s4, s4, s5
|
|
; VI-NEXT: s_xor_b32 s4, s6, s4
|
|
; VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: test_clmul_i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
|
|
; GFX9-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s2, -1
|
|
; GFX9-NEXT: s_mov_b32 s6, s2
|
|
; GFX9-NEXT: s_mov_b32 s7, s3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s4, s10
|
|
; GFX9-NEXT: s_mov_b32 s5, s11
|
|
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX9-NEXT: s_mov_b32 s0, s8
|
|
; GFX9-NEXT: s_mov_b32 s1, s9
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
|
|
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX9-NEXT: s_and_b32 s6, s5, 2
|
|
; GFX9-NEXT: s_and_b32 s7, s5, 1
|
|
; GFX9-NEXT: s_and_b32 s8, s5, 4
|
|
; GFX9-NEXT: s_mul_i32 s6, s4, s6
|
|
; GFX9-NEXT: s_mul_i32 s7, s4, s7
|
|
; GFX9-NEXT: s_and_b32 s9, s5, 8
|
|
; GFX9-NEXT: s_mul_i32 s8, s4, s8
|
|
; GFX9-NEXT: s_xor_b32 s6, s7, s6
|
|
; GFX9-NEXT: s_and_b32 s10, s5, 16
|
|
; GFX9-NEXT: s_mul_i32 s9, s4, s9
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s8
|
|
; GFX9-NEXT: s_and_b32 s11, s5, 32
|
|
; GFX9-NEXT: s_mul_i32 s10, s4, s10
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s9
|
|
; GFX9-NEXT: s_and_b32 s12, s5, 64
|
|
; GFX9-NEXT: s_mul_i32 s11, s4, s11
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s10
|
|
; GFX9-NEXT: s_and_b32 s13, s5, 0x80
|
|
; GFX9-NEXT: s_mul_i32 s12, s4, s12
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s11
|
|
; GFX9-NEXT: s_and_b32 s14, s5, 0x100
|
|
; GFX9-NEXT: s_mul_i32 s13, s4, s13
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s12
|
|
; GFX9-NEXT: s_and_b32 s15, s5, 0x200
|
|
; GFX9-NEXT: s_mul_i32 s14, s4, s14
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s13
|
|
; GFX9-NEXT: s_and_b32 s16, s5, 0x400
|
|
; GFX9-NEXT: s_mul_i32 s15, s4, s15
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s14
|
|
; GFX9-NEXT: s_and_b32 s17, s5, 0x800
|
|
; GFX9-NEXT: s_mul_i32 s16, s4, s16
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s15
|
|
; GFX9-NEXT: s_and_b32 s18, s5, 0x1000
|
|
; GFX9-NEXT: s_mul_i32 s17, s4, s17
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s16
|
|
; GFX9-NEXT: s_and_b32 s19, s5, 0x2000
|
|
; GFX9-NEXT: s_mul_i32 s18, s4, s18
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s17
|
|
; GFX9-NEXT: s_and_b32 s20, s5, 0x4000
|
|
; GFX9-NEXT: s_mul_i32 s19, s4, s19
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s18
|
|
; GFX9-NEXT: s_and_b32 s21, s5, 0x8000
|
|
; GFX9-NEXT: s_mul_i32 s20, s4, s20
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s19
|
|
; GFX9-NEXT: s_and_b32 s22, s5, 0x10000
|
|
; GFX9-NEXT: s_mul_i32 s21, s4, s21
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s20
|
|
; GFX9-NEXT: s_and_b32 s23, s5, 0x20000
|
|
; GFX9-NEXT: s_mul_i32 s22, s4, s22
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s21
|
|
; GFX9-NEXT: s_and_b32 s24, s5, 0x40000
|
|
; GFX9-NEXT: s_mul_i32 s23, s4, s23
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s22
|
|
; GFX9-NEXT: s_and_b32 s25, s5, 0x80000
|
|
; GFX9-NEXT: s_mul_i32 s24, s4, s24
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s23
|
|
; GFX9-NEXT: s_and_b32 s26, s5, 0x100000
|
|
; GFX9-NEXT: s_mul_i32 s25, s4, s25
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s24
|
|
; GFX9-NEXT: s_and_b32 s27, s5, 0x200000
|
|
; GFX9-NEXT: s_mul_i32 s26, s4, s26
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s25
|
|
; GFX9-NEXT: s_and_b32 s28, s5, 0x400000
|
|
; GFX9-NEXT: s_mul_i32 s27, s4, s27
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s26
|
|
; GFX9-NEXT: s_and_b32 s29, s5, 0x800000
|
|
; GFX9-NEXT: s_mul_i32 s28, s4, s28
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s27
|
|
; GFX9-NEXT: s_and_b32 s30, s5, 0x1000000
|
|
; GFX9-NEXT: s_mul_i32 s29, s4, s29
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s28
|
|
; GFX9-NEXT: s_and_b32 s31, s5, 0x2000000
|
|
; GFX9-NEXT: s_mul_i32 s30, s4, s30
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s29
|
|
; GFX9-NEXT: s_and_b32 s33, s5, 0x4000000
|
|
; GFX9-NEXT: s_mul_i32 s31, s4, s31
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s30
|
|
; GFX9-NEXT: s_and_b32 s34, s5, 0x8000000
|
|
; GFX9-NEXT: s_mul_i32 s33, s4, s33
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s31
|
|
; GFX9-NEXT: s_and_b32 s35, s5, 0x10000000
|
|
; GFX9-NEXT: s_mul_i32 s34, s4, s34
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s33
|
|
; GFX9-NEXT: s_and_b32 s36, s5, 0x20000000
|
|
; GFX9-NEXT: s_mul_i32 s35, s4, s35
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s34
|
|
; GFX9-NEXT: s_and_b32 s37, s5, 2.0
|
|
; GFX9-NEXT: s_mul_i32 s36, s4, s36
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s35
|
|
; GFX9-NEXT: s_and_b32 s5, s5, 0x80000000
|
|
; GFX9-NEXT: s_mul_i32 s37, s4, s37
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s36
|
|
; GFX9-NEXT: s_xor_b32 s6, s6, s37
|
|
; GFX9-NEXT: s_mul_i32 s4, s4, s5
|
|
; GFX9-NEXT: s_xor_b32 s4, s6, s4
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_clmul_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-NEXT: s_mov_b32 s6, -1
|
|
; GFX10-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX10-NEXT: s_mov_b32 s10, s6
|
|
; GFX10-NEXT: s_mov_b32 s11, s7
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_mov_b32 s8, s2
|
|
; GFX10-NEXT: s_mov_b32 s9, s3
|
|
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s2, v1
|
|
; GFX10-NEXT: v_readfirstlane_b32 s3, v0
|
|
; GFX10-NEXT: s_and_b32 s4, s2, 2
|
|
; GFX10-NEXT: s_and_b32 s5, s2, 1
|
|
; GFX10-NEXT: s_and_b32 s8, s2, 4
|
|
; GFX10-NEXT: s_mul_i32 s4, s3, s4
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s5
|
|
; GFX10-NEXT: s_and_b32 s9, s2, 8
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s8
|
|
; GFX10-NEXT: s_xor_b32 s4, s5, s4
|
|
; GFX10-NEXT: s_and_b32 s10, s2, 16
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s9
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s11, s2, 32
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s10
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s12, s2, 64
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s11
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s13, s2, 0x80
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s12
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s14, s2, 0x100
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s13
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s15, s2, 0x200
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s14
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s16, s2, 0x400
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s15
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s17, s2, 0x800
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s16
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s18, s2, 0x1000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s17
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s19, s2, 0x2000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s18
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s20, s2, 0x4000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s19
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s21, s2, 0x8000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s20
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s22, s2, 0x10000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s21
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s23, s2, 0x20000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s22
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s24, s2, 0x40000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s23
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s25, s2, 0x80000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s24
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s26, s2, 0x100000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s25
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s27, s2, 0x200000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s26
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s28, s2, 0x400000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s27
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s29, s2, 0x800000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s28
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s30, s2, 0x1000000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s29
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s31, s2, 0x2000000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s30
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s33, s2, 0x4000000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s31
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s34, s2, 0x8000000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s33
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s35, s2, 0x10000000
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s34
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_and_b32 s36, s2, 0x20000000
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s35
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s37, s2, 2.0
|
|
; GFX10-NEXT: s_mul_i32 s5, s3, s36
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_mul_i32 s8, s3, s37
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX10-NEXT: s_and_b32 s2, s2, 0x80000000
|
|
; GFX10-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX10-NEXT: s_mul_i32 s3, s3, s2
|
|
; GFX10-NEXT: s_mov_b32 s5, s1
|
|
; GFX10-NEXT: s_xor_b32 s2, s4, s3
|
|
; GFX10-NEXT: s_mov_b32 s4, s0
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_clmul_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-NEXT: s_mov_b32 s6, -1
|
|
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s10, s6
|
|
; GFX11-NEXT: s_mov_b32 s11, s7
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s8, s2
|
|
; GFX11-NEXT: s_mov_b32 s9, s3
|
|
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s2, v1
|
|
; GFX11-NEXT: v_readfirstlane_b32 s3, v0
|
|
; GFX11-NEXT: s_and_b32 s4, s2, 2
|
|
; GFX11-NEXT: s_and_b32 s5, s2, 1
|
|
; GFX11-NEXT: s_and_b32 s8, s2, 4
|
|
; GFX11-NEXT: s_mul_i32 s4, s3, s4
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s5
|
|
; GFX11-NEXT: s_and_b32 s9, s2, 8
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s8
|
|
; GFX11-NEXT: s_xor_b32 s4, s5, s4
|
|
; GFX11-NEXT: s_and_b32 s10, s2, 16
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s9
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s11, s2, 32
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s10
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s12, s2, 64
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s11
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s13, s2, 0x80
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s12
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s14, s2, 0x100
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s13
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s15, s2, 0x200
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s14
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s16, s2, 0x400
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s15
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s17, s2, 0x800
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s16
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s18, s2, 0x1000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s17
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s19, s2, 0x2000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s18
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s20, s2, 0x4000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s19
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s21, s2, 0x8000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s20
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s22, s2, 0x10000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s21
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s23, s2, 0x20000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s22
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s24, s2, 0x40000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s23
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s25, s2, 0x80000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s24
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s26, s2, 0x100000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s25
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s27, s2, 0x200000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s26
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s28, s2, 0x400000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s27
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s29, s2, 0x800000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s28
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s30, s2, 0x1000000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s29
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s31, s2, 0x2000000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s30
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s33, s2, 0x4000000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s31
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s34, s2, 0x8000000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s33
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s35, s2, 0x10000000
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s34
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_and_b32 s36, s2, 0x20000000
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s35
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s37, s2, 2.0
|
|
; GFX11-NEXT: s_mul_i32 s5, s3, s36
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_mul_i32 s8, s3, s37
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX11-NEXT: s_and_b32 s2, s2, 0x80000000
|
|
; GFX11-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX11-NEXT: s_mul_i32 s3, s3, s2
|
|
; GFX11-NEXT: s_mov_b32 s5, s1
|
|
; GFX11-NEXT: s_xor_b32 s2, s4, s3
|
|
; GFX11-NEXT: s_mov_b32 s4, s0
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_clmul_i32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-NEXT: s_mov_b32 s6, -1
|
|
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX12-NEXT: s_mov_b32 s10, s6
|
|
; GFX12-NEXT: s_mov_b32 s11, s7
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_mov_b32 s8, s2
|
|
; GFX12-NEXT: s_mov_b32 s9, s3
|
|
; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_readfirstlane_b32 s2, v1
|
|
; GFX12-NEXT: v_readfirstlane_b32 s3, v0
|
|
; GFX12-NEXT: s_and_b32 s4, s2, 2
|
|
; GFX12-NEXT: s_and_b32 s5, s2, 1
|
|
; GFX12-NEXT: s_and_b32 s8, s2, 4
|
|
; GFX12-NEXT: s_mul_i32 s4, s3, s4
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s5
|
|
; GFX12-NEXT: s_and_b32 s9, s2, 8
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s8
|
|
; GFX12-NEXT: s_xor_b32 s4, s5, s4
|
|
; GFX12-NEXT: s_and_b32 s10, s2, 16
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s9
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s11, s2, 32
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s10
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s12, s2, 64
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s11
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s13, s2, 0x80
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s12
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s14, s2, 0x100
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s13
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s15, s2, 0x200
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s14
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s16, s2, 0x400
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s15
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s17, s2, 0x800
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s16
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s18, s2, 0x1000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s17
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s19, s2, 0x2000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s18
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s20, s2, 0x4000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s19
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s21, s2, 0x8000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s20
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s22, s2, 0x10000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s21
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s23, s2, 0x20000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s22
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s24, s2, 0x40000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s23
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s25, s2, 0x80000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s24
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s26, s2, 0x100000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s25
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s27, s2, 0x200000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s26
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s28, s2, 0x400000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s27
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s29, s2, 0x800000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s28
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s30, s2, 0x1000000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s29
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s31, s2, 0x2000000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s30
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s33, s2, 0x4000000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s31
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s34, s2, 0x8000000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s33
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s35, s2, 0x10000000
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s34
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_and_b32 s36, s2, 0x20000000
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s35
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s37, s2, 2.0
|
|
; GFX12-NEXT: s_mul_i32 s5, s3, s36
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_mul_i32 s8, s3, s37
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX12-NEXT: s_and_b32 s2, s2, 0x80000000
|
|
; GFX12-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX12-NEXT: s_mul_i32 s3, s3, s2
|
|
; GFX12-NEXT: s_mov_b32 s5, s1
|
|
; GFX12-NEXT: s_xor_b32 s2, s4, s3
|
|
; GFX12-NEXT: s_mov_b32 s4, s0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX12-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-LABEL: test_clmul_i32:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
|
; GFX1250-NEXT: s_mov_b32 s6, -1
|
|
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX1250-NEXT: s_mov_b32 s10, s6
|
|
; GFX1250-NEXT: s_mov_b32 s11, s7
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_mov_b32 s8, s2
|
|
; GFX1250-NEXT: s_mov_b32 s9, s3
|
|
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s2, v1
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s3, v0
|
|
; GFX1250-NEXT: s_and_b32 s4, s2, 2
|
|
; GFX1250-NEXT: s_and_b32 s5, s2, 1
|
|
; GFX1250-NEXT: s_and_b32 s8, s2, 4
|
|
; GFX1250-NEXT: s_mul_i32 s4, s3, s4
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s5
|
|
; GFX1250-NEXT: s_and_b32 s9, s2, 8
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s8
|
|
; GFX1250-NEXT: s_xor_b32 s4, s5, s4
|
|
; GFX1250-NEXT: s_and_b32 s10, s2, 16
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s9
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s11, s2, 32
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s10
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s12, s2, 64
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s11
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s13, s2, 0x80
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s12
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s14, s2, 0x100
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s13
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s15, s2, 0x200
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s14
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s16, s2, 0x400
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s15
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s17, s2, 0x800
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s16
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s18, s2, 0x1000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s17
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s19, s2, 0x2000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s18
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s20, s2, 0x4000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s19
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s21, s2, 0x8000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s20
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s22, s2, 0x10000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s21
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s23, s2, 0x20000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s22
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s24, s2, 0x40000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s23
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s25, s2, 0x80000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s24
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s26, s2, 0x100000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s25
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s27, s2, 0x200000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s26
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s28, s2, 0x400000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s27
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s29, s2, 0x800000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s28
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s30, s2, 0x1000000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s29
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s31, s2, 0x2000000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s30
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s33, s2, 0x4000000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s31
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s34, s2, 0x8000000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s33
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s35, s2, 0x10000000
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s34
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_and_b32 s36, s2, 0x20000000
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s35
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s37, s2, 2.0
|
|
; GFX1250-NEXT: s_mul_i32 s5, s3, s36
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_mul_i32 s8, s3, s37
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s5
|
|
; GFX1250-NEXT: s_and_b32 s2, s2, 0x80000000
|
|
; GFX1250-NEXT: s_xor_b32 s4, s4, s8
|
|
; GFX1250-NEXT: s_mul_i32 s3, s3, s2
|
|
; GFX1250-NEXT: s_mov_b32 s5, s1
|
|
; GFX1250-NEXT: s_xor_b32 s2, s4, s3
|
|
; GFX1250-NEXT: s_mov_b32 s4, s0
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: test_clmul_i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @6
|
|
; EG-NEXT: ALU 116, @9, KC0[], KC1[]
|
|
; EG-NEXT: ALU 10, @126, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: Fetch clause starting at 6:
|
|
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 8:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 9:
|
|
; EG-NEXT: AND_INT T0.W, T0.Y, 1,
|
|
; EG-NEXT: AND_INT * T1.W, T0.Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.W,
|
|
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
|
; EG-NEXT: MULLO_INT * T1.X, T0.X, T1.W,
|
|
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T1.W, T0.Z, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.W,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, PV.Z,
|
|
; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T2.Z, T0.Y, literal.x,
|
|
; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 126:
|
|
; EG-NEXT: XOR_INT T0.W, T0.W, T0.Z, BS:VEC_021/SCL_122
|
|
; EG-NEXT: MULLO_INT * T0.Z, T0.X, T1.Z,
|
|
; EG-NEXT: AND_INT T1.Z, T0.Y, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.Y, T0.X, T2.Z,
|
|
; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.X, PV.Z,
|
|
; EG-NEXT: XOR_INT T0.X, PV.W, PS,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
|
|
%a = load i32, ptr addrspace(1) %in
|
|
%b = load i32, ptr addrspace(1) %b_ptr
|
|
%res = call i32 @llvm.clmul.i32(i32 %a, i32 %b)
|
|
store i32 %res, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_clmulr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
|
; SI-LABEL: test_clmulr_i32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_mov_b32 s10, s2
|
|
; SI-NEXT: s_mov_b32 s11, s3
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_mov_b32 s8, s6
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; SI-NEXT: s_mov_b32 s7, 0
|
|
; SI-NEXT: s_mov_b32 s21, s7
|
|
; SI-NEXT: s_mov_b32 s0, s4
|
|
; SI-NEXT: s_mov_b32 s1, s5
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: s_mov_b32 s15, s7
|
|
; SI-NEXT: s_mov_b32 s11, s7
|
|
; SI-NEXT: s_mov_b32 s13, s7
|
|
; SI-NEXT: s_mov_b32 s17, s7
|
|
; SI-NEXT: s_mov_b32 s19, s7
|
|
; SI-NEXT: s_mov_b32 s23, s7
|
|
; SI-NEXT: s_mov_b32 s25, s7
|
|
; SI-NEXT: s_mov_b32 s27, s7
|
|
; SI-NEXT: s_mov_b32 s29, s7
|
|
; SI-NEXT: s_mov_b32 s31, s7
|
|
; SI-NEXT: s_mov_b32 s35, s7
|
|
; SI-NEXT: s_mov_b32 s37, s7
|
|
; SI-NEXT: s_mov_b32 s39, s7
|
|
; SI-NEXT: s_mov_b32 s41, s7
|
|
; SI-NEXT: s_mov_b32 s43, s7
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_readfirstlane_b32 s33, v1
|
|
; SI-NEXT: s_and_b32 s20, s33, 2
|
|
; SI-NEXT: v_readfirstlane_b32 s6, v0
|
|
; SI-NEXT: s_bfe_i32 s8, s33, 0x10000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[20:21], s[20:21], 0
|
|
; SI-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
|
|
; SI-NEXT: s_and_b32 s8, s8, s6
|
|
; SI-NEXT: s_and_b64 s[20:21], s[20:21], exec
|
|
; SI-NEXT: s_cselect_b32 s21, 0, s5
|
|
; SI-NEXT: s_cselect_b32 s20, 0, s4
|
|
; SI-NEXT: s_and_b32 s14, s33, 4
|
|
; SI-NEXT: s_xor_b64 s[20:21], s[8:9], s[20:21]
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 2
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s15, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s14, 0, s14
|
|
; SI-NEXT: s_and_b32 s10, s33, 8
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0
|
|
; SI-NEXT: s_xor_b64 s[14:15], s[20:21], s[14:15]
|
|
; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 3
|
|
; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
|
|
; SI-NEXT: s_cselect_b32 s11, 0, s21
|
|
; SI-NEXT: s_cselect_b32 s10, 0, s20
|
|
; SI-NEXT: s_and_b32 s12, s33, 16
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[12:13], s[12:13], 0
|
|
; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 4
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[14:15], s[10:11]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[12:13], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s21
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s20
|
|
; SI-NEXT: s_and_b32 s16, s33, 32
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[16:17], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 5
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s18, s33, 64
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[18:19], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 6
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s22, s33, 0x80
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[22:23], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 7
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s24, s33, 0x100
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[24:25], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 8
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s26, s33, 0x200
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[26:27], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 9
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s28, s33, 0x400
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[28:29], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 10
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s30, s33, 0x800
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[30:31], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 11
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s34, s33, 0x1000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[34:35], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 12
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s36, s33, 0x2000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[36:37], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 13
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s38, s33, 0x4000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[38:39], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 14
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s40, s33, 0x8000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[40:41], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 15
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s42, s33, 0x10000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[42:43], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 16
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_mov_b32 s5, s7
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s4, s33, 0x20000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 17
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: s_cselect_b32 s5, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s4, 0, s14
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x40000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[10:11], s[4:5]
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s13
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s12
|
|
; SI-NEXT: s_and_b32 s10, s33, 0x80000
|
|
; SI-NEXT: s_mov_b32 s11, s7
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[10:11], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 19
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x100000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x200000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x400000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x800000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x1000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x2000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x4000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x8000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x10000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x20000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 2.0
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31
|
|
; SI-NEXT: s_cmp_gt_i32 s33, -1
|
|
; SI-NEXT: s_cselect_b32 s7, 0, s7
|
|
; SI-NEXT: s_cselect_b32 s6, 0, s6
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
|
|
; SI-NEXT: s_lshr_b64 s[4:5], s[4:5], 31
|
|
; SI-NEXT: v_mov_b32_e32 v0, s4
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: test_clmulr_i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: s_mov_b32 s10, s2
|
|
; VI-NEXT: s_mov_b32 s11, s3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s8, s6
|
|
; VI-NEXT: s_mov_b32 s9, s7
|
|
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; VI-NEXT: s_mov_b32 s0, s4
|
|
; VI-NEXT: s_mov_b32 s7, 0
|
|
; VI-NEXT: s_mov_b32 s1, s5
|
|
; VI-NEXT: s_mov_b32 s11, s7
|
|
; VI-NEXT: s_mov_b32 s9, s7
|
|
; VI-NEXT: s_mov_b32 s13, s7
|
|
; VI-NEXT: s_mov_b32 s15, s7
|
|
; VI-NEXT: s_mov_b32 s17, s7
|
|
; VI-NEXT: s_mov_b32 s19, s7
|
|
; VI-NEXT: s_mov_b32 s21, s7
|
|
; VI-NEXT: s_mov_b32 s23, s7
|
|
; VI-NEXT: s_mov_b32 s25, s7
|
|
; VI-NEXT: s_mov_b32 s27, s7
|
|
; VI-NEXT: s_mov_b32 s29, s7
|
|
; VI-NEXT: s_mov_b32 s31, s7
|
|
; VI-NEXT: s_mov_b32 s35, s7
|
|
; VI-NEXT: s_mov_b32 s37, s7
|
|
; VI-NEXT: s_mov_b32 s39, s7
|
|
; VI-NEXT: s_mov_b32 s41, s7
|
|
; VI-NEXT: s_mov_b32 s43, s7
|
|
; VI-NEXT: s_mov_b32 s45, s7
|
|
; VI-NEXT: s_mov_b32 s47, s7
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_readfirstlane_b32 s4, v1
|
|
; VI-NEXT: v_readfirstlane_b32 s6, v0
|
|
; VI-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 1
|
|
; VI-NEXT: s_and_b32 s10, s4, 2
|
|
; VI-NEXT: s_and_b32 s8, s5, s6
|
|
; VI-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s49
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s48
|
|
; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 2
|
|
; VI-NEXT: s_and_b32 s12, s4, 4
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s49
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s48
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 3
|
|
; VI-NEXT: s_and_b32 s14, s4, 8
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[14:15], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 4
|
|
; VI-NEXT: s_and_b32 s16, s4, 16
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[16:17], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
|
|
; VI-NEXT: s_and_b32 s18, s4, 32
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[18:19], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 6
|
|
; VI-NEXT: s_and_b32 s20, s4, 64
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[20:21], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 7
|
|
; VI-NEXT: s_and_b32 s22, s4, 0x80
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[22:23], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 8
|
|
; VI-NEXT: s_and_b32 s24, s4, 0x100
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[24:25], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 9
|
|
; VI-NEXT: s_and_b32 s26, s4, 0x200
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[26:27], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 10
|
|
; VI-NEXT: s_and_b32 s28, s4, 0x400
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[28:29], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 11
|
|
; VI-NEXT: s_and_b32 s30, s4, 0x800
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[30:31], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 12
|
|
; VI-NEXT: s_and_b32 s34, s4, 0x1000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[34:35], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 13
|
|
; VI-NEXT: s_and_b32 s36, s4, 0x2000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[36:37], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 14
|
|
; VI-NEXT: s_and_b32 s38, s4, 0x4000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[38:39], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 15
|
|
; VI-NEXT: s_and_b32 s40, s4, 0x8000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[40:41], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 16
|
|
; VI-NEXT: s_and_b32 s42, s4, 0x10000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[42:43], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 17
|
|
; VI-NEXT: s_and_b32 s44, s4, 0x20000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[44:45], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18
|
|
; VI-NEXT: s_and_b32 s46, s4, 0x40000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[46:47], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 19
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; VI-NEXT: s_mov_b32 s11, s7
|
|
; VI-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x100000
|
|
; VI-NEXT: s_mov_b32 s13, s7
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x200000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x400000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x800000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x1000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x2000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x4000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x8000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x10000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x20000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30
|
|
; VI-NEXT: s_and_b32 s12, s4, 2.0
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31
|
|
; VI-NEXT: s_cmp_gt_i32 s4, -1
|
|
; VI-NEXT: s_cselect_b32 s5, 0, s7
|
|
; VI-NEXT: s_cselect_b32 s4, 0, s6
|
|
; VI-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
|
; VI-NEXT: s_lshr_b64 s[4:5], s[4:5], 31
|
|
; VI-NEXT: v_mov_b32_e32 v0, s4
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: test_clmulr_i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
|
|
; GFX9-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s2, -1
|
|
; GFX9-NEXT: s_mov_b32 s6, s2
|
|
; GFX9-NEXT: s_mov_b32 s7, s3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s4, s10
|
|
; GFX9-NEXT: s_mov_b32 s5, s11
|
|
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX9-NEXT: s_mov_b32 s0, s8
|
|
; GFX9-NEXT: s_mov_b32 s5, 0
|
|
; GFX9-NEXT: s_mov_b32 s11, s5
|
|
; GFX9-NEXT: s_mov_b32 s7, s5
|
|
; GFX9-NEXT: s_mov_b32 s13, s5
|
|
; GFX9-NEXT: s_mov_b32 s15, s5
|
|
; GFX9-NEXT: s_mov_b32 s17, s5
|
|
; GFX9-NEXT: s_mov_b32 s19, s5
|
|
; GFX9-NEXT: s_mov_b32 s21, s5
|
|
; GFX9-NEXT: s_mov_b32 s23, s5
|
|
; GFX9-NEXT: s_mov_b32 s25, s5
|
|
; GFX9-NEXT: s_mov_b32 s27, s5
|
|
; GFX9-NEXT: s_mov_b32 s29, s5
|
|
; GFX9-NEXT: s_mov_b32 s31, s5
|
|
; GFX9-NEXT: s_mov_b32 s35, s5
|
|
; GFX9-NEXT: s_mov_b32 s37, s5
|
|
; GFX9-NEXT: s_mov_b32 s39, s5
|
|
; GFX9-NEXT: s_mov_b32 s41, s5
|
|
; GFX9-NEXT: s_mov_b32 s43, s5
|
|
; GFX9-NEXT: s_mov_b32 s45, s5
|
|
; GFX9-NEXT: s_mov_b32 s47, s5
|
|
; GFX9-NEXT: s_mov_b32 s1, s9
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s8, v1
|
|
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX9-NEXT: s_bfe_i32 s6, s8, 0x10000
|
|
; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 1
|
|
; GFX9-NEXT: s_and_b32 s10, s8, 2
|
|
; GFX9-NEXT: s_and_b32 s6, s6, s4
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s49
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s48
|
|
; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 2
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 4
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s49
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s48
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 3
|
|
; GFX9-NEXT: s_and_b32 s14, s8, 8
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[14:15], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 4
|
|
; GFX9-NEXT: s_and_b32 s16, s8, 16
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 5
|
|
; GFX9-NEXT: s_and_b32 s18, s8, 32
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[18:19], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 6
|
|
; GFX9-NEXT: s_and_b32 s20, s8, 64
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[20:21], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 7
|
|
; GFX9-NEXT: s_and_b32 s22, s8, 0x80
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[22:23], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 8
|
|
; GFX9-NEXT: s_and_b32 s24, s8, 0x100
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[24:25], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 9
|
|
; GFX9-NEXT: s_and_b32 s26, s8, 0x200
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[26:27], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 10
|
|
; GFX9-NEXT: s_and_b32 s28, s8, 0x400
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[28:29], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 11
|
|
; GFX9-NEXT: s_and_b32 s30, s8, 0x800
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[30:31], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 12
|
|
; GFX9-NEXT: s_and_b32 s34, s8, 0x1000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[34:35], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 13
|
|
; GFX9-NEXT: s_and_b32 s36, s8, 0x2000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[36:37], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 14
|
|
; GFX9-NEXT: s_and_b32 s38, s8, 0x4000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[38:39], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 15
|
|
; GFX9-NEXT: s_and_b32 s40, s8, 0x8000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[40:41], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 16
|
|
; GFX9-NEXT: s_and_b32 s42, s8, 0x10000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[42:43], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 17
|
|
; GFX9-NEXT: s_and_b32 s44, s8, 0x20000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[44:45], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 18
|
|
; GFX9-NEXT: s_and_b32 s46, s8, 0x40000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[46:47], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 19
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_and_b32 s10, s8, 0x80000
|
|
; GFX9-NEXT: s_mov_b32 s11, s5
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 20
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x100000
|
|
; GFX9-NEXT: s_mov_b32 s13, s5
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 21
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x200000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 22
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x400000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 23
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x800000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 24
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x1000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 25
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x2000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 26
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x4000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 27
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x8000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 28
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x10000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 29
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x20000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 30
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 2.0
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], 31
|
|
; GFX9-NEXT: s_cmp_gt_i32 s8, -1
|
|
; GFX9-NEXT: s_cselect_b32 s5, 0, s5
|
|
; GFX9-NEXT: s_cselect_b32 s4, 0, s4
|
|
; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
|
|
; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 31
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s4
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_clmulr_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-NEXT: s_mov_b32 s6, -1
|
|
; GFX10-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX10-NEXT: s_mov_b32 s10, s6
|
|
; GFX10-NEXT: s_mov_b32 s11, s7
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_mov_b32 s8, s2
|
|
; GFX10-NEXT: s_mov_b32 s9, s3
|
|
; GFX10-NEXT: s_mov_b32 s3, 0
|
|
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
|
|
; GFX10-NEXT: s_mov_b32 s11, s3
|
|
; GFX10-NEXT: s_mov_b32 s9, s3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX10-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX10-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 2
|
|
; GFX10-NEXT: s_lshl_b64 s[12:13], s[2:3], 1
|
|
; GFX10-NEXT: s_and_b32 s8, s5, s2
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_mov_b32 s5, s1
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s13
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s12
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 4
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 2
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 8
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 3
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 16
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 4
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 32
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 5
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 64
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 6
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x80
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 7
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x100
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 8
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x200
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 9
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x400
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 10
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x800
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 11
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x1000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 12
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x2000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 13
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x4000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 14
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x8000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 15
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x10000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 16
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x20000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 17
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x40000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 18
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 19
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x100000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 20
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x200000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 21
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x400000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 22
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x800000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 23
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x1000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 24
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x2000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 25
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x4000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 26
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x8000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 27
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x10000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 28
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x20000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 29
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 2.0
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 30
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s11, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s10, 0, s14
|
|
; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 31
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; GFX10-NEXT: s_cmp_gt_i32 s4, -1
|
|
; GFX10-NEXT: s_mov_b32 s4, s0
|
|
; GFX10-NEXT: s_cselect_b32 s3, 0, s3
|
|
; GFX10-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX10-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 31
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_clmulr_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-NEXT: s_mov_b32 s6, -1
|
|
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s10, s6
|
|
; GFX11-NEXT: s_mov_b32 s11, s7
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s8, s2
|
|
; GFX11-NEXT: s_mov_b32 s9, s3
|
|
; GFX11-NEXT: s_mov_b32 s3, 0
|
|
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
|
|
; GFX11-NEXT: s_mov_b32 s11, s3
|
|
; GFX11-NEXT: s_mov_b32 s9, s3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX11-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX11-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 2
|
|
; GFX11-NEXT: s_lshl_b64 s[12:13], s[2:3], 1
|
|
; GFX11-NEXT: s_and_b32 s8, s5, s2
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_mov_b32 s5, s1
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s13
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s12
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 4
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 2
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 8
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 3
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 16
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 4
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 32
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 5
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 64
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 6
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x80
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 7
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x100
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 8
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x200
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 9
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x400
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 10
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x800
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 11
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x1000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 12
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x2000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 13
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x4000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 14
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x8000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 15
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x10000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 16
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x20000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 17
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x40000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 18
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 19
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x100000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 20
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x200000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 21
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x400000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 22
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x800000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 23
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x1000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 24
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x2000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 25
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x4000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 26
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x8000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 27
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x10000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 28
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x20000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 29
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 2.0
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 30
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s11, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s10, 0, s14
|
|
; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 31
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; GFX11-NEXT: s_cmp_gt_i32 s4, -1
|
|
; GFX11-NEXT: s_mov_b32 s4, s0
|
|
; GFX11-NEXT: s_cselect_b32 s3, 0, s3
|
|
; GFX11-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 31
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_clmulr_i32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-NEXT: s_mov_b32 s6, -1
|
|
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX12-NEXT: s_mov_b32 s10, s6
|
|
; GFX12-NEXT: s_mov_b32 s11, s7
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_mov_b32 s8, s2
|
|
; GFX12-NEXT: s_mov_b32 s9, s3
|
|
; GFX12-NEXT: s_mov_b32 s3, 0
|
|
; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX12-NEXT: s_mov_b32 s5, s3
|
|
; GFX12-NEXT: s_mov_b32 s9, s3
|
|
; GFX12-NEXT: s_mov_b32 s11, s3
|
|
; GFX12-NEXT: s_mov_b32 s13, s3
|
|
; GFX12-NEXT: s_mov_b32 s15, s3
|
|
; GFX12-NEXT: s_mov_b32 s17, s3
|
|
; GFX12-NEXT: s_mov_b32 s19, s3
|
|
; GFX12-NEXT: s_mov_b32 s21, s3
|
|
; GFX12-NEXT: s_mov_b32 s23, s3
|
|
; GFX12-NEXT: s_mov_b32 s25, s3
|
|
; GFX12-NEXT: s_mov_b32 s27, s3
|
|
; GFX12-NEXT: s_mov_b32 s29, s3
|
|
; GFX12-NEXT: s_mov_b32 s31, s3
|
|
; GFX12-NEXT: s_mov_b32 s35, s3
|
|
; GFX12-NEXT: s_mov_b32 s37, s3
|
|
; GFX12-NEXT: s_mov_b32 s39, s3
|
|
; GFX12-NEXT: s_mov_b32 s41, s3
|
|
; GFX12-NEXT: s_mov_b32 s43, s3
|
|
; GFX12-NEXT: s_mov_b32 s45, s3
|
|
; GFX12-NEXT: s_mov_b32 s47, s3
|
|
; GFX12-NEXT: s_mov_b32 s49, s3
|
|
; GFX12-NEXT: s_mov_b32 s51, s3
|
|
; GFX12-NEXT: s_mov_b32 s53, s3
|
|
; GFX12-NEXT: s_mov_b32 s55, s3
|
|
; GFX12-NEXT: s_mov_b32 s57, s3
|
|
; GFX12-NEXT: s_mov_b32 s59, s3
|
|
; GFX12-NEXT: s_mov_b32 s61, s3
|
|
; GFX12-NEXT: s_mov_b32 s63, s3
|
|
; GFX12-NEXT: s_mov_b32 s65, s3
|
|
; GFX12-NEXT: s_mov_b32 s67, s3
|
|
; GFX12-NEXT: s_mov_b32 s69, s3
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_readfirstlane_b32 s33, v1
|
|
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX12-NEXT: s_and_b32 s4, s33, 2
|
|
; GFX12-NEXT: s_and_b32 s8, s33, 1
|
|
; GFX12-NEXT: s_and_b32 s10, s33, 4
|
|
; GFX12-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5]
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s12, s33, 8
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
|
; GFX12-NEXT: s_and_b32 s14, s33, 16
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s16, s33, 32
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s18, s33, 64
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[16:17]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s20, s33, 0x80
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[18:19]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s22, s33, 0x100
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s24, s33, 0x200
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s26, s33, 0x400
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[24:25]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s28, s33, 0x800
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[26:27]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s30, s33, 0x1000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s34, s33, 0x2000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s36, s33, 0x4000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[34:35]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s38, s33, 0x8000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[36:37]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s40, s33, 0x10000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s42, s33, 0x20000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s44, s33, 0x40000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[42:43]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s46, s33, 0x80000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[44:45]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s48, s33, 0x100000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s50, s33, 0x200000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s52, s33, 0x400000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[50:51]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s54, s33, 0x800000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[52:53]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s56, s33, 0x1000000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s58, s33, 0x2000000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s60, s33, 0x4000000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[58:59]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s62, s33, 0x8000000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[60:61]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s64, s33, 0x10000000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s66, s33, 0x20000000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s68, s33, 2.0
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[66:67]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[68:69]
|
|
; GFX12-NEXT: s_and_b32 s12, s33, 0x80000000
|
|
; GFX12-NEXT: s_mov_b32 s13, s3
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], 0
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_mov_b32 s4, s0
|
|
; GFX12-NEXT: s_lshr_b64 s[2:3], s[2:3], 31
|
|
; GFX12-NEXT: s_mov_b32 s5, s1
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX12-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-LABEL: test_clmulr_i32:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
|
; GFX1250-NEXT: s_mov_b32 s6, -1
|
|
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX1250-NEXT: s_mov_b32 s10, s6
|
|
; GFX1250-NEXT: s_mov_b32 s11, s7
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_mov_b32 s8, s2
|
|
; GFX1250-NEXT: s_mov_b32 s9, s3
|
|
; GFX1250-NEXT: s_mov_b32 s3, 0
|
|
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX1250-NEXT: s_mov_b32 s5, s3
|
|
; GFX1250-NEXT: s_wait_xcnt 0x0
|
|
; GFX1250-NEXT: s_mov_b64 s[8:9], 0x80000000
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX1250-NEXT: s_and_b64 s[10:11], s[4:5], 2
|
|
; GFX1250-NEXT: s_and_b64 s[12:13], s[4:5], 1
|
|
; GFX1250-NEXT: s_and_b64 s[14:15], s[4:5], 4
|
|
; GFX1250-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11]
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[16:17], s[4:5], 8
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[12:13], s[10:11]
|
|
; GFX1250-NEXT: s_and_b64 s[18:19], s[4:5], 16
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[16:17]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[20:21], s[4:5], 32
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[18:19]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[22:23], s[4:5], 64
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[24:25], s[4:5], 0x80
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[26:27], s[4:5], 0x100
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[24:25]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[28:29], s[4:5], 0x200
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[26:27]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[30:31], s[4:5], 0x400
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[34:35], s[4:5], 0x800
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[36:37], s[4:5], 0x1000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[34:35]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[38:39], s[4:5], 0x2000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[36:37]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[40:41], s[4:5], 0x4000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[42:43], s[4:5], 0x8000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[44:45], s[4:5], 0x10000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[42:43]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[46:47], s[4:5], 0x20000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[44:45]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[48:49], s[4:5], 0x40000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[50:51], s[4:5], 0x80000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[52:53], s[4:5], 0x100000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[50:51]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[54:55], s[4:5], 0x200000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[52:53]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[56:57], s[4:5], 0x400000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[58:59], s[4:5], 0x800000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[60:61], s[4:5], 0x1000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[58:59]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[62:63], s[4:5], 0x2000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[60:61]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[64:65], s[4:5], 0x4000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[66:67], s[4:5], 0x8000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[68:69], s[4:5], 0x10000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[66:67]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[70:71], s[4:5], 0x20000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[68:69]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[72:73], s[4:5], 0x40000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[70:71]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[72:73]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX1250-NEXT: s_xor_b64 s[8:9], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5]
|
|
; GFX1250-NEXT: s_mov_b32 s4, s0
|
|
; GFX1250-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX1250-NEXT: s_mov_b32 s5, s1
|
|
; GFX1250-NEXT: s_lshr_b64 s[2:3], s[2:3], 31
|
|
; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, s2
|
|
; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: test_clmulr_i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 98, @11, KC0[], KC1[]
|
|
; EG-NEXT: ALU 110, @110, KC0[], KC1[]
|
|
; EG-NEXT: ALU 12, @221, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 10:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 11:
|
|
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, PV.W, literal.x,
|
|
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T0.Y, literal.y,
|
|
; EG-NEXT: 65280(9.147676e-41), 8(1.121039e-44)
|
|
; EG-NEXT: AND_INT T1.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T1.Y, T0.Y, literal.y,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, literal.z,
|
|
; EG-NEXT: LSHL T0.W, T0.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
|
; EG-NEXT: 65280(9.147676e-41), 24(3.363116e-44)
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T2.X, T0.X, literal.x,
|
|
; EG-NEXT: LSHL T0.Y, PS, literal.y,
|
|
; EG-NEXT: LSHL T2.Z, T0.X, literal.x,
|
|
; EG-NEXT: OR_INT T0.W, PV.W, PV.Z,
|
|
; EG-NEXT: OR_INT * T1.W, PV.X, PV.Y,
|
|
; EG-NEXT: 24(3.363116e-44), 8(1.121039e-44)
|
|
; EG-NEXT: OR_INT T1.Z, PV.W, PS,
|
|
; EG-NEXT: OR_INT T0.W, PV.Z, PV.Y,
|
|
; EG-NEXT: OR_INT * T1.W, T0.Z, PV.X,
|
|
; EG-NEXT: OR_INT T0.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, literal.y,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.Z, PS, literal.x,
|
|
; EG-NEXT: LSHL T0.W, PV.W, literal.y,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T1.Z, PS, literal.x,
|
|
; EG-NEXT: LSHR T1.W, PS, literal.y,
|
|
; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHR T0.Z, PS, literal.y,
|
|
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT T1.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.Y, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT T0.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, 1,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, PS, literal.x,
|
|
; EG-NEXT: LSHL T0.W, PV.W, 1,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.W, PS, 1,
|
|
; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
|
; EG-NEXT: AND_INT T3.W, T1.W, literal.x,
|
|
; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.W, T1.W, literal.x,
|
|
; EG-NEXT: MULLO_INT * T0.Y, T0.W, PV.W,
|
|
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, T0.X, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.W,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 110:
|
|
; EG-NEXT: XOR_INT T3.W, T2.W, T0.X,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, T0.Z,
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, T1.Z,
|
|
; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T1.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: XOR_INT T1.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: LSHR T0.Z, T3.W, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT * T1.W, T3.W, literal.y,
|
|
; EG-NEXT: 8(1.121039e-44), 65280(9.147676e-41)
|
|
; EG-NEXT: LSHL T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, T2.W, literal.y,
|
|
; EG-NEXT: LSHR T0.W, PV.W, literal.y,
|
|
; EG-NEXT: AND_INT * T1.W, PV.Z, literal.z,
|
|
; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44)
|
|
; EG-NEXT: 65280(9.147676e-41), 0(0.000000e+00)
|
|
; EG-NEXT: OR_INT T0.W, PS, PV.W,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Z, PV.Y,
|
|
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.W, PS, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.W, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: ALU clause starting at 221:
|
|
; EG-NEXT: AND_INT T0.W, T0.W, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, T1.W, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.W, PS, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.W, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: OR_INT T0.X, PV.W, PS,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
|
|
%a = load i32, ptr addrspace(1) %in
|
|
%b = load i32, ptr addrspace(1) %b_ptr
|
|
%a.ext = zext i32 %a to i64
|
|
%b.ext = zext i32 %b to i64
|
|
%clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
|
|
%res.ext = lshr i64 %clmul, 31
|
|
%res = trunc i64 %res.ext to i32
|
|
store i32 %res, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @test_clmulh_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
|
; SI-LABEL: test_clmulh_i32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
|
|
; SI-NEXT: s_mov_b32 s3, 0xf000
|
|
; SI-NEXT: s_mov_b32 s2, -1
|
|
; SI-NEXT: s_mov_b32 s10, s2
|
|
; SI-NEXT: s_mov_b32 s11, s3
|
|
; SI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; SI-NEXT: s_mov_b32 s8, s6
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; SI-NEXT: s_mov_b32 s7, 0
|
|
; SI-NEXT: s_mov_b32 s21, s7
|
|
; SI-NEXT: s_mov_b32 s0, s4
|
|
; SI-NEXT: s_mov_b32 s1, s5
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: s_mov_b32 s15, s7
|
|
; SI-NEXT: s_mov_b32 s11, s7
|
|
; SI-NEXT: s_mov_b32 s13, s7
|
|
; SI-NEXT: s_mov_b32 s17, s7
|
|
; SI-NEXT: s_mov_b32 s19, s7
|
|
; SI-NEXT: s_mov_b32 s23, s7
|
|
; SI-NEXT: s_mov_b32 s25, s7
|
|
; SI-NEXT: s_mov_b32 s27, s7
|
|
; SI-NEXT: s_mov_b32 s29, s7
|
|
; SI-NEXT: s_mov_b32 s31, s7
|
|
; SI-NEXT: s_mov_b32 s35, s7
|
|
; SI-NEXT: s_mov_b32 s37, s7
|
|
; SI-NEXT: s_mov_b32 s39, s7
|
|
; SI-NEXT: s_mov_b32 s41, s7
|
|
; SI-NEXT: s_mov_b32 s43, s7
|
|
; SI-NEXT: s_waitcnt vmcnt(0)
|
|
; SI-NEXT: v_readfirstlane_b32 s33, v1
|
|
; SI-NEXT: s_and_b32 s20, s33, 2
|
|
; SI-NEXT: v_readfirstlane_b32 s6, v0
|
|
; SI-NEXT: s_bfe_i32 s8, s33, 0x10000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[20:21], s[20:21], 0
|
|
; SI-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
|
|
; SI-NEXT: s_and_b32 s8, s8, s6
|
|
; SI-NEXT: s_and_b64 s[20:21], s[20:21], exec
|
|
; SI-NEXT: s_cselect_b32 s21, 0, s5
|
|
; SI-NEXT: s_cselect_b32 s20, 0, s4
|
|
; SI-NEXT: s_and_b32 s14, s33, 4
|
|
; SI-NEXT: s_xor_b64 s[20:21], s[8:9], s[20:21]
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 2
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s15, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s14, 0, s14
|
|
; SI-NEXT: s_and_b32 s10, s33, 8
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0
|
|
; SI-NEXT: s_xor_b64 s[14:15], s[20:21], s[14:15]
|
|
; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 3
|
|
; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
|
|
; SI-NEXT: s_cselect_b32 s11, 0, s21
|
|
; SI-NEXT: s_cselect_b32 s10, 0, s20
|
|
; SI-NEXT: s_and_b32 s12, s33, 16
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[12:13], s[12:13], 0
|
|
; SI-NEXT: s_lshl_b64 s[20:21], s[6:7], 4
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[14:15], s[10:11]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[12:13], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s21
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s20
|
|
; SI-NEXT: s_and_b32 s16, s33, 32
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[16:17], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 5
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s18, s33, 64
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[18:19], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 6
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s22, s33, 0x80
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[22:23], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 7
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s24, s33, 0x100
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[24:25], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 8
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s26, s33, 0x200
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[26:27], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 9
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s28, s33, 0x400
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[28:29], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 10
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s30, s33, 0x800
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[30:31], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 11
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s34, s33, 0x1000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[34:35], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 12
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s36, s33, 0x2000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[36:37], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 13
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s38, s33, 0x4000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[38:39], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 14
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s40, s33, 0x8000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[40:41], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 15
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s42, s33, 0x10000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[16:17], s[42:43], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 16
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[12:13], s[16:17], exec
|
|
; SI-NEXT: s_mov_b32 s5, s7
|
|
; SI-NEXT: s_cselect_b32 s13, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s12, 0, s14
|
|
; SI-NEXT: s_and_b32 s4, s33, 0x20000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0
|
|
; SI-NEXT: s_lshl_b64 s[14:15], s[6:7], 17
|
|
; SI-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; SI-NEXT: s_and_b64 s[4:5], s[4:5], exec
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: s_cselect_b32 s5, 0, s15
|
|
; SI-NEXT: s_cselect_b32 s4, 0, s14
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x40000
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[10:11], s[4:5]
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s13
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s12
|
|
; SI-NEXT: s_and_b32 s10, s33, 0x80000
|
|
; SI-NEXT: s_mov_b32 s11, s7
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[10:11], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 19
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x100000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x200000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x400000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x800000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x1000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x2000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x4000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x8000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x10000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 0x20000000
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_and_b32 s8, s33, 2.0
|
|
; SI-NEXT: s_mov_b32 s9, s7
|
|
; SI-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
|
|
; SI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30
|
|
; SI-NEXT: s_and_b64 s[8:9], s[8:9], exec
|
|
; SI-NEXT: s_cselect_b32 s9, 0, s11
|
|
; SI-NEXT: s_cselect_b32 s8, 0, s10
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; SI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31
|
|
; SI-NEXT: s_cmp_gt_i32 s33, -1
|
|
; SI-NEXT: s_cselect_b32 s7, 0, s7
|
|
; SI-NEXT: s_cselect_b32 s6, 0, s6
|
|
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7]
|
|
; SI-NEXT: v_mov_b32_e32 v0, s5
|
|
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; SI-NEXT: s_endpgm
|
|
;
|
|
; VI-LABEL: test_clmulh_i32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
|
|
; VI-NEXT: s_mov_b32 s3, 0xf000
|
|
; VI-NEXT: s_mov_b32 s2, -1
|
|
; VI-NEXT: s_mov_b32 s10, s2
|
|
; VI-NEXT: s_mov_b32 s11, s3
|
|
; VI-NEXT: s_waitcnt lgkmcnt(0)
|
|
; VI-NEXT: s_mov_b32 s8, s6
|
|
; VI-NEXT: s_mov_b32 s9, s7
|
|
; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; VI-NEXT: s_mov_b32 s0, s4
|
|
; VI-NEXT: s_mov_b32 s7, 0
|
|
; VI-NEXT: s_mov_b32 s1, s5
|
|
; VI-NEXT: s_mov_b32 s11, s7
|
|
; VI-NEXT: s_mov_b32 s9, s7
|
|
; VI-NEXT: s_mov_b32 s13, s7
|
|
; VI-NEXT: s_mov_b32 s15, s7
|
|
; VI-NEXT: s_mov_b32 s17, s7
|
|
; VI-NEXT: s_mov_b32 s19, s7
|
|
; VI-NEXT: s_mov_b32 s21, s7
|
|
; VI-NEXT: s_mov_b32 s23, s7
|
|
; VI-NEXT: s_mov_b32 s25, s7
|
|
; VI-NEXT: s_mov_b32 s27, s7
|
|
; VI-NEXT: s_mov_b32 s29, s7
|
|
; VI-NEXT: s_mov_b32 s31, s7
|
|
; VI-NEXT: s_mov_b32 s35, s7
|
|
; VI-NEXT: s_mov_b32 s37, s7
|
|
; VI-NEXT: s_mov_b32 s39, s7
|
|
; VI-NEXT: s_mov_b32 s41, s7
|
|
; VI-NEXT: s_mov_b32 s43, s7
|
|
; VI-NEXT: s_mov_b32 s45, s7
|
|
; VI-NEXT: s_mov_b32 s47, s7
|
|
; VI-NEXT: s_waitcnt vmcnt(0)
|
|
; VI-NEXT: v_readfirstlane_b32 s4, v1
|
|
; VI-NEXT: v_readfirstlane_b32 s6, v0
|
|
; VI-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 1
|
|
; VI-NEXT: s_and_b32 s10, s4, 2
|
|
; VI-NEXT: s_and_b32 s8, s5, s6
|
|
; VI-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s49
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s48
|
|
; VI-NEXT: s_lshl_b64 s[48:49], s[6:7], 2
|
|
; VI-NEXT: s_and_b32 s12, s4, 4
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s49
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s48
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 3
|
|
; VI-NEXT: s_and_b32 s14, s4, 8
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[14:15], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 4
|
|
; VI-NEXT: s_and_b32 s16, s4, 16
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[16:17], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
|
|
; VI-NEXT: s_and_b32 s18, s4, 32
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[18:19], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 6
|
|
; VI-NEXT: s_and_b32 s20, s4, 64
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[20:21], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 7
|
|
; VI-NEXT: s_and_b32 s22, s4, 0x80
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[22:23], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 8
|
|
; VI-NEXT: s_and_b32 s24, s4, 0x100
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[24:25], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 9
|
|
; VI-NEXT: s_and_b32 s26, s4, 0x200
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[26:27], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 10
|
|
; VI-NEXT: s_and_b32 s28, s4, 0x400
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[28:29], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 11
|
|
; VI-NEXT: s_and_b32 s30, s4, 0x800
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[30:31], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 12
|
|
; VI-NEXT: s_and_b32 s34, s4, 0x1000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[34:35], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 13
|
|
; VI-NEXT: s_and_b32 s36, s4, 0x2000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[36:37], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 14
|
|
; VI-NEXT: s_and_b32 s38, s4, 0x4000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[38:39], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 15
|
|
; VI-NEXT: s_and_b32 s40, s4, 0x8000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[40:41], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 16
|
|
; VI-NEXT: s_and_b32 s42, s4, 0x10000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[42:43], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 17
|
|
; VI-NEXT: s_and_b32 s44, s4, 0x20000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[44:45], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 18
|
|
; VI-NEXT: s_and_b32 s46, s4, 0x40000
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_cmp_eq_u64 s[46:47], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_lshl_b64 s[12:13], s[6:7], 19
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; VI-NEXT: s_mov_b32 s11, s7
|
|
; VI-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s13
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s12
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 20
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x100000
|
|
; VI-NEXT: s_mov_b32 s13, s7
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 21
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x200000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 22
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x400000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 23
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x800000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 24
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x1000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 25
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x2000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 26
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x4000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 27
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x8000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 28
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x10000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 29
|
|
; VI-NEXT: s_and_b32 s12, s4, 0x20000000
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[10:11], s[6:7], 30
|
|
; VI-NEXT: s_and_b32 s12, s4, 2.0
|
|
; VI-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; VI-NEXT: s_cselect_b32 s11, 0, s11
|
|
; VI-NEXT: s_cselect_b32 s10, 0, s10
|
|
; VI-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], 31
|
|
; VI-NEXT: s_cmp_gt_i32 s4, -1
|
|
; VI-NEXT: s_cselect_b32 s5, 0, s7
|
|
; VI-NEXT: s_cselect_b32 s4, 0, s6
|
|
; VI-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
|
; VI-NEXT: v_mov_b32_e32 v0, s5
|
|
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; VI-NEXT: s_endpgm
|
|
;
|
|
; GFX9-LABEL: test_clmulh_i32:
|
|
; GFX9: ; %bb.0:
|
|
; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
|
|
; GFX9-NEXT: s_mov_b32 s3, 0xf000
|
|
; GFX9-NEXT: s_mov_b32 s2, -1
|
|
; GFX9-NEXT: s_mov_b32 s6, s2
|
|
; GFX9-NEXT: s_mov_b32 s7, s3
|
|
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX9-NEXT: s_mov_b32 s4, s10
|
|
; GFX9-NEXT: s_mov_b32 s5, s11
|
|
; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
|
|
; GFX9-NEXT: s_mov_b32 s0, s8
|
|
; GFX9-NEXT: s_mov_b32 s5, 0
|
|
; GFX9-NEXT: s_mov_b32 s11, s5
|
|
; GFX9-NEXT: s_mov_b32 s7, s5
|
|
; GFX9-NEXT: s_mov_b32 s13, s5
|
|
; GFX9-NEXT: s_mov_b32 s15, s5
|
|
; GFX9-NEXT: s_mov_b32 s17, s5
|
|
; GFX9-NEXT: s_mov_b32 s19, s5
|
|
; GFX9-NEXT: s_mov_b32 s21, s5
|
|
; GFX9-NEXT: s_mov_b32 s23, s5
|
|
; GFX9-NEXT: s_mov_b32 s25, s5
|
|
; GFX9-NEXT: s_mov_b32 s27, s5
|
|
; GFX9-NEXT: s_mov_b32 s29, s5
|
|
; GFX9-NEXT: s_mov_b32 s31, s5
|
|
; GFX9-NEXT: s_mov_b32 s35, s5
|
|
; GFX9-NEXT: s_mov_b32 s37, s5
|
|
; GFX9-NEXT: s_mov_b32 s39, s5
|
|
; GFX9-NEXT: s_mov_b32 s41, s5
|
|
; GFX9-NEXT: s_mov_b32 s43, s5
|
|
; GFX9-NEXT: s_mov_b32 s45, s5
|
|
; GFX9-NEXT: s_mov_b32 s47, s5
|
|
; GFX9-NEXT: s_mov_b32 s1, s9
|
|
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX9-NEXT: v_readfirstlane_b32 s8, v1
|
|
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
|
|
; GFX9-NEXT: s_bfe_i32 s6, s8, 0x10000
|
|
; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 1
|
|
; GFX9-NEXT: s_and_b32 s10, s8, 2
|
|
; GFX9-NEXT: s_and_b32 s6, s6, s4
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s49
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s48
|
|
; GFX9-NEXT: s_lshl_b64 s[48:49], s[4:5], 2
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 4
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s49
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s48
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 3
|
|
; GFX9-NEXT: s_and_b32 s14, s8, 8
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[14:15], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 4
|
|
; GFX9-NEXT: s_and_b32 s16, s8, 16
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[16:17], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 5
|
|
; GFX9-NEXT: s_and_b32 s18, s8, 32
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[18:19], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 6
|
|
; GFX9-NEXT: s_and_b32 s20, s8, 64
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[20:21], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 7
|
|
; GFX9-NEXT: s_and_b32 s22, s8, 0x80
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[22:23], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 8
|
|
; GFX9-NEXT: s_and_b32 s24, s8, 0x100
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[24:25], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 9
|
|
; GFX9-NEXT: s_and_b32 s26, s8, 0x200
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[26:27], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 10
|
|
; GFX9-NEXT: s_and_b32 s28, s8, 0x400
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[28:29], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 11
|
|
; GFX9-NEXT: s_and_b32 s30, s8, 0x800
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[30:31], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 12
|
|
; GFX9-NEXT: s_and_b32 s34, s8, 0x1000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[34:35], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 13
|
|
; GFX9-NEXT: s_and_b32 s36, s8, 0x2000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[36:37], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 14
|
|
; GFX9-NEXT: s_and_b32 s38, s8, 0x4000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[38:39], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 15
|
|
; GFX9-NEXT: s_and_b32 s40, s8, 0x8000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[40:41], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 16
|
|
; GFX9-NEXT: s_and_b32 s42, s8, 0x10000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[42:43], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 17
|
|
; GFX9-NEXT: s_and_b32 s44, s8, 0x20000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[44:45], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 18
|
|
; GFX9-NEXT: s_and_b32 s46, s8, 0x40000
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[46:47], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_lshl_b64 s[12:13], s[4:5], 19
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_and_b32 s10, s8, 0x80000
|
|
; GFX9-NEXT: s_mov_b32 s11, s5
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s13
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s12
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 20
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x100000
|
|
; GFX9-NEXT: s_mov_b32 s13, s5
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 21
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x200000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 22
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x400000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 23
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x800000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 24
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x1000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 25
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x2000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 26
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x4000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 27
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x8000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 28
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x10000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 29
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 0x20000000
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], 30
|
|
; GFX9-NEXT: s_and_b32 s12, s8, 2.0
|
|
; GFX9-NEXT: s_cmp_eq_u64 s[12:13], 0
|
|
; GFX9-NEXT: s_cselect_b32 s11, 0, s11
|
|
; GFX9-NEXT: s_cselect_b32 s10, 0, s10
|
|
; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[10:11]
|
|
; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], 31
|
|
; GFX9-NEXT: s_cmp_gt_i32 s8, -1
|
|
; GFX9-NEXT: s_cselect_b32 s5, 0, s5
|
|
; GFX9-NEXT: s_cselect_b32 s4, 0, s4
|
|
; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5]
|
|
; GFX9-NEXT: v_mov_b32_e32 v0, s5
|
|
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GFX9-NEXT: s_endpgm
|
|
;
|
|
; GFX10-LABEL: test_clmulh_i32:
|
|
; GFX10: ; %bb.0:
|
|
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
|
|
; GFX10-NEXT: s_mov_b32 s6, -1
|
|
; GFX10-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX10-NEXT: s_mov_b32 s10, s6
|
|
; GFX10-NEXT: s_mov_b32 s11, s7
|
|
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX10-NEXT: s_mov_b32 s8, s2
|
|
; GFX10-NEXT: s_mov_b32 s9, s3
|
|
; GFX10-NEXT: s_mov_b32 s3, 0
|
|
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
|
|
; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0)
|
|
; GFX10-NEXT: s_mov_b32 s11, s3
|
|
; GFX10-NEXT: s_mov_b32 s9, s3
|
|
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX10-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX10-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX10-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 2
|
|
; GFX10-NEXT: s_lshl_b64 s[12:13], s[2:3], 1
|
|
; GFX10-NEXT: s_and_b32 s8, s5, s2
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_mov_b32 s5, s1
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s13
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s12
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 4
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 2
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 8
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 3
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 16
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 4
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 32
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 5
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 64
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 6
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x80
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 7
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x100
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 8
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x200
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 9
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x400
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 10
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x800
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 11
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x1000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 12
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x2000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 13
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x4000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 14
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x8000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 15
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x10000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 16
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x20000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 17
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x40000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 18
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 19
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x100000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 20
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x200000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 21
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x400000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 22
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x800000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 23
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x1000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 24
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x2000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 25
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x4000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 26
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x8000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 27
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x10000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 28
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 0x20000000
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 29
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX10-NEXT: s_and_b32 s10, s4, 2.0
|
|
; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], 30
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX10-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX10-NEXT: s_cselect_b32 s11, 0, s15
|
|
; GFX10-NEXT: s_cselect_b32 s10, 0, s14
|
|
; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 31
|
|
; GFX10-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; GFX10-NEXT: s_cmp_gt_i32 s4, -1
|
|
; GFX10-NEXT: s_mov_b32 s4, s0
|
|
; GFX10-NEXT: s_cselect_b32 s3, 0, s3
|
|
; GFX10-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX10-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX10-NEXT: v_mov_b32_e32 v0, s3
|
|
; GFX10-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
|
; GFX10-NEXT: s_endpgm
|
|
;
|
|
; GFX11-LABEL: test_clmulh_i32:
|
|
; GFX11: ; %bb.0:
|
|
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX11-NEXT: s_mov_b32 s6, -1
|
|
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX11-NEXT: s_mov_b32 s10, s6
|
|
; GFX11-NEXT: s_mov_b32 s11, s7
|
|
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GFX11-NEXT: s_mov_b32 s8, s2
|
|
; GFX11-NEXT: s_mov_b32 s9, s3
|
|
; GFX11-NEXT: s_mov_b32 s3, 0
|
|
; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
|
|
; GFX11-NEXT: s_mov_b32 s11, s3
|
|
; GFX11-NEXT: s_mov_b32 s9, s3
|
|
; GFX11-NEXT: s_waitcnt vmcnt(0)
|
|
; GFX11-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX11-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX11-NEXT: s_bfe_i32 s5, s4, 0x10000
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 2
|
|
; GFX11-NEXT: s_lshl_b64 s[12:13], s[2:3], 1
|
|
; GFX11-NEXT: s_and_b32 s8, s5, s2
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_mov_b32 s5, s1
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s13
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s12
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 4
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 2
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 8
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 3
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 16
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 4
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 32
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 5
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 64
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 6
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x80
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 7
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x100
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 8
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x200
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 9
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x400
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 10
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x800
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 11
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x1000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 12
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x2000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 13
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x4000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 14
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x8000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 15
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x10000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 16
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x20000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 17
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x40000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 18
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x80000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 19
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x100000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 20
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x200000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 21
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x400000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 22
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x800000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 23
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x1000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 24
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x2000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 25
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x4000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 26
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x8000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 27
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x10000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 28
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 0x20000000
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 29
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s13, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s12, 0, s14
|
|
; GFX11-NEXT: s_and_b32 s10, s4, 2.0
|
|
; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], 30
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[12:13]
|
|
; GFX11-NEXT: s_cmp_eq_u64 s[10:11], 0
|
|
; GFX11-NEXT: s_cselect_b32 s11, 0, s15
|
|
; GFX11-NEXT: s_cselect_b32 s10, 0, s14
|
|
; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 31
|
|
; GFX11-NEXT: s_xor_b64 s[8:9], s[8:9], s[10:11]
|
|
; GFX11-NEXT: s_cmp_gt_i32 s4, -1
|
|
; GFX11-NEXT: s_mov_b32 s4, s0
|
|
; GFX11-NEXT: s_cselect_b32 s3, 0, s3
|
|
; GFX11-NEXT: s_cselect_b32 s2, 0, s2
|
|
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
|
; GFX11-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX11-NEXT: v_mov_b32_e32 v0, s3
|
|
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
|
|
; GFX11-NEXT: s_endpgm
|
|
;
|
|
; GFX12-LABEL: test_clmulh_i32:
|
|
; GFX12: ; %bb.0:
|
|
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
|
|
; GFX12-NEXT: s_mov_b32 s6, -1
|
|
; GFX12-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX12-NEXT: s_mov_b32 s10, s6
|
|
; GFX12-NEXT: s_mov_b32 s11, s7
|
|
; GFX12-NEXT: s_wait_kmcnt 0x0
|
|
; GFX12-NEXT: s_mov_b32 s8, s2
|
|
; GFX12-NEXT: s_mov_b32 s9, s3
|
|
; GFX12-NEXT: s_mov_b32 s3, 0
|
|
; GFX12-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX12-NEXT: s_mov_b32 s5, s3
|
|
; GFX12-NEXT: s_mov_b32 s9, s3
|
|
; GFX12-NEXT: s_mov_b32 s11, s3
|
|
; GFX12-NEXT: s_mov_b32 s13, s3
|
|
; GFX12-NEXT: s_mov_b32 s15, s3
|
|
; GFX12-NEXT: s_mov_b32 s17, s3
|
|
; GFX12-NEXT: s_mov_b32 s19, s3
|
|
; GFX12-NEXT: s_mov_b32 s21, s3
|
|
; GFX12-NEXT: s_mov_b32 s23, s3
|
|
; GFX12-NEXT: s_mov_b32 s25, s3
|
|
; GFX12-NEXT: s_mov_b32 s27, s3
|
|
; GFX12-NEXT: s_mov_b32 s29, s3
|
|
; GFX12-NEXT: s_mov_b32 s31, s3
|
|
; GFX12-NEXT: s_mov_b32 s35, s3
|
|
; GFX12-NEXT: s_mov_b32 s37, s3
|
|
; GFX12-NEXT: s_mov_b32 s39, s3
|
|
; GFX12-NEXT: s_mov_b32 s41, s3
|
|
; GFX12-NEXT: s_mov_b32 s43, s3
|
|
; GFX12-NEXT: s_mov_b32 s45, s3
|
|
; GFX12-NEXT: s_mov_b32 s47, s3
|
|
; GFX12-NEXT: s_mov_b32 s49, s3
|
|
; GFX12-NEXT: s_mov_b32 s51, s3
|
|
; GFX12-NEXT: s_mov_b32 s53, s3
|
|
; GFX12-NEXT: s_mov_b32 s55, s3
|
|
; GFX12-NEXT: s_mov_b32 s57, s3
|
|
; GFX12-NEXT: s_mov_b32 s59, s3
|
|
; GFX12-NEXT: s_mov_b32 s61, s3
|
|
; GFX12-NEXT: s_mov_b32 s63, s3
|
|
; GFX12-NEXT: s_mov_b32 s65, s3
|
|
; GFX12-NEXT: s_mov_b32 s67, s3
|
|
; GFX12-NEXT: s_mov_b32 s69, s3
|
|
; GFX12-NEXT: s_wait_loadcnt 0x0
|
|
; GFX12-NEXT: v_readfirstlane_b32 s33, v1
|
|
; GFX12-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX12-NEXT: s_and_b32 s4, s33, 2
|
|
; GFX12-NEXT: s_and_b32 s8, s33, 1
|
|
; GFX12-NEXT: s_and_b32 s10, s33, 4
|
|
; GFX12-NEXT: s_mul_u64 s[4:5], s[2:3], s[4:5]
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s12, s33, 8
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5]
|
|
; GFX12-NEXT: s_and_b32 s14, s33, 16
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s16, s33, 32
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s18, s33, 64
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[16:17]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s20, s33, 0x80
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[18:19]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s22, s33, 0x100
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s24, s33, 0x200
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s26, s33, 0x400
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[24:25]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s28, s33, 0x800
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[26:27]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s30, s33, 0x1000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s34, s33, 0x2000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s36, s33, 0x4000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[34:35]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s38, s33, 0x8000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[36:37]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s40, s33, 0x10000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s42, s33, 0x20000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s44, s33, 0x40000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[42:43]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s46, s33, 0x80000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[44:45]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s48, s33, 0x100000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s50, s33, 0x200000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s52, s33, 0x400000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[50:51]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s54, s33, 0x800000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[52:53]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s56, s33, 0x1000000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s58, s33, 0x2000000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s60, s33, 0x4000000
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[58:59]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_and_b32 s62, s33, 0x8000000
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[60:61]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_and_b32 s64, s33, 0x10000000
|
|
; GFX12-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_and_b32 s66, s33, 0x20000000
|
|
; GFX12-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[12:13]
|
|
; GFX12-NEXT: s_and_b32 s68, s33, 2.0
|
|
; GFX12-NEXT: s_mul_u64 s[8:9], s[2:3], s[66:67]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[14:15]
|
|
; GFX12-NEXT: s_mul_u64 s[10:11], s[2:3], s[68:69]
|
|
; GFX12-NEXT: s_and_b32 s12, s33, 0x80000000
|
|
; GFX12-NEXT: s_mov_b32 s13, s3
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[12:13]
|
|
; GFX12-NEXT: s_xor_b64 s[4:5], s[4:5], s[10:11]
|
|
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
|
; GFX12-NEXT: s_xor_b64 s[2:3], s[4:5], s[2:3]
|
|
; GFX12-NEXT: s_mov_b32 s4, s0
|
|
; GFX12-NEXT: v_mov_b32_e32 v0, s3
|
|
; GFX12-NEXT: s_mov_b32 s5, s1
|
|
; GFX12-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX12-NEXT: s_endpgm
|
|
;
|
|
; GFX1250-LABEL: test_clmulh_i32:
|
|
; GFX1250: ; %bb.0:
|
|
; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0
|
|
; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv
|
|
; GFX1250-NEXT: s_mov_b32 s6, -1
|
|
; GFX1250-NEXT: s_mov_b32 s7, 0x31016000
|
|
; GFX1250-NEXT: s_mov_b32 s10, s6
|
|
; GFX1250-NEXT: s_mov_b32 s11, s7
|
|
; GFX1250-NEXT: s_wait_kmcnt 0x0
|
|
; GFX1250-NEXT: s_mov_b32 s8, s2
|
|
; GFX1250-NEXT: s_mov_b32 s9, s3
|
|
; GFX1250-NEXT: s_mov_b32 s3, 0
|
|
; GFX1250-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
|
|
; GFX1250-NEXT: s_mov_b32 s5, s3
|
|
; GFX1250-NEXT: s_wait_xcnt 0x0
|
|
; GFX1250-NEXT: s_mov_b64 s[8:9], 0x80000000
|
|
; GFX1250-NEXT: s_wait_loadcnt 0x0
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s4, v1
|
|
; GFX1250-NEXT: v_readfirstlane_b32 s2, v0
|
|
; GFX1250-NEXT: s_and_b64 s[10:11], s[4:5], 2
|
|
; GFX1250-NEXT: s_and_b64 s[12:13], s[4:5], 1
|
|
; GFX1250-NEXT: s_and_b64 s[14:15], s[4:5], 4
|
|
; GFX1250-NEXT: s_mul_u64 s[10:11], s[2:3], s[10:11]
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[16:17], s[4:5], 8
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[14:15]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[12:13], s[10:11]
|
|
; GFX1250-NEXT: s_and_b64 s[18:19], s[4:5], 16
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[16:17]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[20:21], s[4:5], 32
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[18:19]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[22:23], s[4:5], 64
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[20:21]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[24:25], s[4:5], 0x80
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[22:23]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[26:27], s[4:5], 0x100
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[24:25]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[28:29], s[4:5], 0x200
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[26:27]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[30:31], s[4:5], 0x400
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[28:29]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[34:35], s[4:5], 0x800
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[30:31]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[36:37], s[4:5], 0x1000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[34:35]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[38:39], s[4:5], 0x2000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[36:37]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[40:41], s[4:5], 0x4000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[38:39]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[42:43], s[4:5], 0x8000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[40:41]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[44:45], s[4:5], 0x10000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[42:43]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[46:47], s[4:5], 0x20000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[44:45]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[48:49], s[4:5], 0x40000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[46:47]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[50:51], s[4:5], 0x80000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[48:49]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[52:53], s[4:5], 0x100000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[50:51]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[54:55], s[4:5], 0x200000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[52:53]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[56:57], s[4:5], 0x400000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[54:55]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[58:59], s[4:5], 0x800000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[56:57]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[60:61], s[4:5], 0x1000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[58:59]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[62:63], s[4:5], 0x2000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[60:61]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[64:65], s[4:5], 0x4000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[62:63]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[66:67], s[4:5], 0x8000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[64:65]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[68:69], s[4:5], 0x10000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[66:67]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_and_b64 s[70:71], s[4:5], 0x20000000
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[68:69]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[72:73], s[4:5], 0x40000000
|
|
; GFX1250-NEXT: s_mul_u64 s[12:13], s[2:3], s[70:71]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_mul_u64 s[14:15], s[2:3], s[72:73]
|
|
; GFX1250-NEXT: s_xor_b64 s[10:11], s[10:11], s[12:13]
|
|
; GFX1250-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
|
|
; GFX1250-NEXT: s_xor_b64 s[8:9], s[10:11], s[14:15]
|
|
; GFX1250-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5]
|
|
; GFX1250-NEXT: s_mov_b32 s4, s0
|
|
; GFX1250-NEXT: s_xor_b64 s[2:3], s[8:9], s[2:3]
|
|
; GFX1250-NEXT: s_mov_b32 s5, s1
|
|
; GFX1250-NEXT: v_mov_b32_e32 v0, s3
|
|
; GFX1250-NEXT: buffer_store_b32 v0, off, s[4:7], null
|
|
; GFX1250-NEXT: s_endpgm
|
|
;
|
|
; EG-LABEL: test_clmulh_i32:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: TEX 0 @8
|
|
; EG-NEXT: ALU 98, @11, KC0[], KC1[]
|
|
; EG-NEXT: ALU 110, @110, KC0[], KC1[]
|
|
; EG-NEXT: ALU 13, @221, KC0[CB0:0-32], KC1[]
|
|
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: Fetch clause starting at 8:
|
|
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
|
|
; EG-NEXT: ALU clause starting at 10:
|
|
; EG-NEXT: MOV * T0.X, KC0[2].Z,
|
|
; EG-NEXT: ALU clause starting at 11:
|
|
; EG-NEXT: LSHR * T0.W, T0.X, literal.x,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, PV.W, literal.x,
|
|
; EG-NEXT: AND_INT T0.W, T0.Y, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, T0.Y, literal.y,
|
|
; EG-NEXT: 65280(9.147676e-41), 8(1.121039e-44)
|
|
; EG-NEXT: AND_INT T1.X, PS, literal.x,
|
|
; EG-NEXT: LSHR T1.Y, T0.Y, literal.y,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, literal.z,
|
|
; EG-NEXT: LSHL T0.W, T0.Y, literal.y,
|
|
; EG-NEXT: AND_INT * T1.W, T0.X, literal.x,
|
|
; EG-NEXT: 65280(9.147676e-41), 24(3.363116e-44)
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: LSHR T2.X, T0.X, literal.x,
|
|
; EG-NEXT: LSHL T0.Y, PS, literal.y,
|
|
; EG-NEXT: LSHL T2.Z, T0.X, literal.x,
|
|
; EG-NEXT: OR_INT T0.W, PV.W, PV.Z,
|
|
; EG-NEXT: OR_INT * T1.W, PV.X, PV.Y,
|
|
; EG-NEXT: 24(3.363116e-44), 8(1.121039e-44)
|
|
; EG-NEXT: OR_INT T1.Z, PV.W, PS,
|
|
; EG-NEXT: OR_INT T0.W, PV.Z, PV.Y,
|
|
; EG-NEXT: OR_INT * T1.W, T0.Z, PV.X,
|
|
; EG-NEXT: OR_INT T0.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, literal.y,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.Z, PS, literal.x,
|
|
; EG-NEXT: LSHL T0.W, PV.W, literal.y,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T1.Z, PS, literal.x,
|
|
; EG-NEXT: LSHR T1.W, PS, literal.y,
|
|
; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHR T0.Z, PS, literal.y,
|
|
; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.Z, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT T1.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.Y, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT T0.Z, PV.W, PS,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, PV.W, 1,
|
|
; EG-NEXT: AND_INT T0.W, PV.Z, literal.x,
|
|
; EG-NEXT: LSHR * T1.W, PV.Z, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, PS, literal.x,
|
|
; EG-NEXT: LSHL T0.W, PV.W, 1,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Y, PV.Z,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.W, PS, 1,
|
|
; EG-NEXT: OR_INT * T0.W, PV.Z, PV.W,
|
|
; EG-NEXT: AND_INT T3.W, T1.W, literal.x,
|
|
; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T2.W, T1.W, literal.x,
|
|
; EG-NEXT: MULLO_INT * T0.Y, T0.W, PV.W,
|
|
; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, T0.X, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.W,
|
|
; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 128(1.793662e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 256(3.587324e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T2.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 512(7.174648e-43), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT * T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: 1024(1.434930e-42), 0(0.000000e+00)
|
|
; EG-NEXT: ALU clause starting at 110:
|
|
; EG-NEXT: XOR_INT T3.W, T2.W, T0.X,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, T0.Z,
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, T1.Z,
|
|
; EG-NEXT: 2048(2.869859e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 4096(5.739719e-42), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 8192(1.147944e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16384(2.295887e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 32768(4.591775e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 65536(9.183550e-41), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 131072(1.836710e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 262144(3.673420e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 524288(7.346840e-40), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 1048576(1.469368e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 2097152(2.938736e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 4194304(5.877472e-39), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 16777216(2.350989e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T3.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 33554432(9.403955e-38), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 67108864(1.504633e-36), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 134217728(3.851860e-34), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 268435456(2.524355e-29), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 536870912(1.084202e-19), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T1.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T4.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: 1073741824(2.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.Z, T1.W, literal.x,
|
|
; EG-NEXT: XOR_INT T1.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: -2147483648(-0.000000e+00), 0(0.000000e+00)
|
|
; EG-NEXT: XOR_INT T1.W, PV.W, PS,
|
|
; EG-NEXT: MULLO_INT * T0.X, T0.W, PV.Z,
|
|
; EG-NEXT: LSHR T0.Z, T3.W, literal.x,
|
|
; EG-NEXT: XOR_INT T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT * T1.W, T3.W, literal.y,
|
|
; EG-NEXT: 8(1.121039e-44), 65280(9.147676e-41)
|
|
; EG-NEXT: LSHL T0.Y, PS, literal.x,
|
|
; EG-NEXT: LSHL T1.Z, T2.W, literal.y,
|
|
; EG-NEXT: LSHR T0.W, PV.W, literal.y,
|
|
; EG-NEXT: AND_INT * T1.W, PV.Z, literal.z,
|
|
; EG-NEXT: 8(1.121039e-44), 24(3.363116e-44)
|
|
; EG-NEXT: 65280(9.147676e-41), 0(0.000000e+00)
|
|
; EG-NEXT: OR_INT T0.W, PS, PV.W,
|
|
; EG-NEXT: OR_INT * T1.W, PV.Z, PV.Y,
|
|
; EG-NEXT: OR_INT * T0.W, PS, PV.W,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: AND_INT T0.W, PS, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.W, literal.y,
|
|
; EG-NEXT: 252645135(7.053345e-30), 4(5.605194e-45)
|
|
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: ALU clause starting at 221:
|
|
; EG-NEXT: AND_INT T0.W, T0.W, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, T1.W, literal.y,
|
|
; EG-NEXT: 858993459(4.172325e-08), 2(2.802597e-45)
|
|
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
|
; EG-NEXT: AND_INT T1.W, PV.W, literal.x,
|
|
; EG-NEXT: LSHR * T0.W, PV.W, 1,
|
|
; EG-NEXT: 1431655765(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: AND_INT T0.W, PS, literal.x,
|
|
; EG-NEXT: LSHL * T1.W, PV.W, 1,
|
|
; EG-NEXT: 1431655764(1.466015e+13), 0(0.000000e+00)
|
|
; EG-NEXT: OR_INT * T0.W, PV.W, PS,
|
|
; EG-NEXT: LSHR T0.X, PV.W, 1,
|
|
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
|
|
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
|
|
%b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
|
|
%a = load i32, ptr addrspace(1) %in
|
|
%b = load i32, ptr addrspace(1) %b_ptr
|
|
%a.ext = zext i32 %a to i64
|
|
%b.ext = zext i32 %b to i64
|
|
%clmul = call i64 @llvm.clmul.i64(i64 %a.ext, i64 %b.ext)
|
|
%res.ext = lshr i64 %clmul, 32
|
|
%res = trunc i64 %res.ext to i32
|
|
store i32 %res, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone}
|