[LegalizeTypes][DAG] Use SHL(X,1) instead of ADD(X,X) for variable vector indices for extraction/insertion legalization (#188277)
Avoid ADD(X,X) as it doesn't correctly handle undef elements and helps avoid some FREEZE() fold headaches Resurrects #86857
This commit is contained in:
@@ -231,8 +231,8 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
|
||||
|
||||
// Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
|
||||
SDValue Idx = N->getOperand(1);
|
||||
|
||||
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
|
||||
Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx,
|
||||
DAG.getShiftAmountConstant(1, Idx.getValueType(), dl));
|
||||
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
|
||||
|
||||
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
|
||||
@@ -446,10 +446,10 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
|
||||
std::swap(Lo, Hi);
|
||||
|
||||
SDValue Idx = N->getOperand(2);
|
||||
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
|
||||
Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx,
|
||||
DAG.getShiftAmountConstant(1, Idx.getValueType(), dl));
|
||||
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
|
||||
Idx = DAG.getNode(ISD::ADD, dl,
|
||||
Idx.getValueType(), Idx,
|
||||
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
|
||||
DAG.getConstant(1, dl, Idx.getValueType()));
|
||||
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
|
||||
|
||||
|
||||
@@ -2856,23 +2856,23 @@ define i128 @extract_v2i128_c(<2 x i128> %a, i32 %c) {
|
||||
; CHECK-SD-NEXT: sub sp, sp, #64
|
||||
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
|
||||
; CHECK-SD-NEXT: adds x9, x0, x0
|
||||
; CHECK-SD-NEXT: mov w8, w4
|
||||
; CHECK-SD-NEXT: mov w8, #1 // =0x1
|
||||
; CHECK-SD-NEXT: // kill: def $w4 killed $w4 def $x4
|
||||
; CHECK-SD-NEXT: adc x10, x1, x1
|
||||
; CHECK-SD-NEXT: adds x11, x2, x2
|
||||
; CHECK-SD-NEXT: fmov d1, x9
|
||||
; CHECK-SD-NEXT: fmov d0, x11
|
||||
; CHECK-SD-NEXT: adc x12, x3, x3
|
||||
; CHECK-SD-NEXT: add x8, x8, x8
|
||||
; CHECK-SD-NEXT: and x9, x8, #0x2
|
||||
; CHECK-SD-NEXT: orr w8, w8, #0x1
|
||||
; CHECK-SD-NEXT: mov x11, sp
|
||||
; CHECK-SD-NEXT: adc x11, x3, x3
|
||||
; CHECK-SD-NEXT: orr w8, w8, w4, lsl #1
|
||||
; CHECK-SD-NEXT: ubfiz x9, x4, #4, #1
|
||||
; CHECK-SD-NEXT: mov v1.d[1], x10
|
||||
; CHECK-SD-NEXT: add x10, sp, #32
|
||||
; CHECK-SD-NEXT: and x8, x8, #0x3
|
||||
; CHECK-SD-NEXT: mov v0.d[1], x12
|
||||
; CHECK-SD-NEXT: mov v0.d[1], x11
|
||||
; CHECK-SD-NEXT: mov x11, sp
|
||||
; CHECK-SD-NEXT: stp q1, q0, [sp]
|
||||
; CHECK-SD-NEXT: stp q1, q0, [sp, #32]
|
||||
; CHECK-SD-NEXT: ldr x0, [x10, x9, lsl #3]
|
||||
; CHECK-SD-NEXT: ldr x0, [x10, x9]
|
||||
; CHECK-SD-NEXT: ldr x1, [x11, x8, lsl #3]
|
||||
; CHECK-SD-NEXT: add sp, sp, #64
|
||||
; CHECK-SD-NEXT: ret
|
||||
|
||||
@@ -6,18 +6,15 @@
|
||||
define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, i32 %idx) {
|
||||
; GFX9-LABEL: test_bitcast_llc_v128i8_v16i8:
|
||||
; GFX9: ; %bb.0: ; %entry
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 8
|
||||
; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0
|
||||
; GFX9-NEXT: s_load_dword s33, s[4:5], 0x8
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 8
|
||||
; GFX9-NEXT: s_and_b32 s1, s0, 0xff
|
||||
; GFX9-NEXT: s_or_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_and_b32 s1, s0, 0xffff
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_or_b32 s0, s1, s0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_add_i32 s33, s33, s33
|
||||
; GFX9-NEXT: s_mov_b32 s1, s0
|
||||
; GFX9-NEXT: s_lshl_b32 s33, s33, 1
|
||||
; GFX9-NEXT: s_mov_b32 s2, s0
|
||||
; GFX9-NEXT: s_mov_b32 s3, s0
|
||||
; GFX9-NEXT: s_mov_b32 s4, s0
|
||||
@@ -48,7 +45,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: s_mov_b32 s29, s0
|
||||
; GFX9-NEXT: s_mov_b32 s30, s0
|
||||
; GFX9-NEXT: s_mov_b32 s31, s0
|
||||
; GFX9-NEXT: s_add_i32 s36, s33, 3
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshl_b32 s33, s33, 2
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
|
||||
@@ -65,12 +63,9 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[26:27], s[26:27]
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[28:29], s[28:29]
|
||||
; GFX9-NEXT: v_mov_b64_e32 v[30:31], s[30:31]
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s36, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v35, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s0, s33, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v34, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s33, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v35, v3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v34, v2
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: v_mov_b32_e32 v36, 0
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s33, gpr_idx(SRC0)
|
||||
@@ -125,10 +120,11 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX11-NEXT: s_mov_b32 s29, s0
|
||||
; GFX11-NEXT: s_mov_b32 s30, s0
|
||||
; GFX11-NEXT: s_mov_b32 s31, s0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_add_i32 s33, s33, s33
|
||||
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_lshl_b32 m0, s33, 2
|
||||
; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
|
||||
; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
||||
; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
||||
; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
||||
@@ -142,14 +138,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX11-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25
|
||||
; GFX11-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27
|
||||
; GFX11-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29
|
||||
; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
|
||||
; GFX11-NEXT: s_lshl_b32 s0, s33, 1
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_add_i32 m0, s0, 3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v34, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s0, 2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v33, v0
|
||||
; GFX11-NEXT: s_mov_b32 m0, s0
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v34, v3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v33, v2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v32, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v31, v0
|
||||
; GFX11-NEXT: global_store_b128 v35, v[31:34], s[34:35]
|
||||
@@ -198,10 +188,11 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX12-NEXT: s_mov_b32 s29, s0
|
||||
; GFX12-NEXT: s_mov_b32 s30, s0
|
||||
; GFX12-NEXT: s_mov_b32 s31, s0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_add_co_i32 s33, s38, s38
|
||||
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
|
||||
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_lshl_b32 m0, s38, 2
|
||||
; GFX12-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
|
||||
; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
|
||||
; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
|
||||
; GFX12-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
|
||||
@@ -215,14 +206,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out,
|
||||
; GFX12-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25
|
||||
; GFX12-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27
|
||||
; GFX12-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29
|
||||
; GFX12-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31
|
||||
; GFX12-NEXT: s_lshl_b32 s0, s33, 1
|
||||
; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0)
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s0, 3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v34, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s0, 2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v33, v0
|
||||
; GFX12-NEXT: s_mov_b32 m0, s0
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v34, v3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v33, v2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v32, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v31, v0
|
||||
; GFX12-NEXT: global_store_b128 v35, v[31:34], s[36:37]
|
||||
@@ -243,20 +228,13 @@ define amdgpu_kernel void @test_bitcast_llc_v64i16_v8i16(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v1
|
||||
; GFX9-NEXT: s_add_i32 s3, s2, 3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
@@ -265,38 +243,26 @@ define amdgpu_kernel void @test_bitcast_llc_v64i16_v8i16(ptr addrspace(1) %out,
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX11-NEXT: s_mov_b32 m0, s2
|
||||
; GFX11-NEXT: s_lshl_b32 m0, s2, 2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: test_bitcast_llc_v64i16_v8i16:
|
||||
; GFX12: ; %bb.0: ; %entry
|
||||
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
|
||||
; GFX12-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_add_co_i32 s2, s2, s2
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX12-NEXT: s_mov_b32 m0, s2
|
||||
; GFX12-NEXT: s_lshl_b32 m0, s2, 2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 3
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 2
|
||||
; GFX12-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
@@ -315,20 +281,13 @@ define amdgpu_kernel void @test_bitcast_llc_v32i32_v4i32(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v1
|
||||
; GFX9-NEXT: s_add_i32 s3, s2, 3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
@@ -337,38 +296,26 @@ define amdgpu_kernel void @test_bitcast_llc_v32i32_v4i32(ptr addrspace(1) %out,
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX11-NEXT: s_mov_b32 m0, s2
|
||||
; GFX11-NEXT: s_lshl_b32 m0, s2, 2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: test_bitcast_llc_v32i32_v4i32:
|
||||
; GFX12: ; %bb.0: ; %entry
|
||||
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
|
||||
; GFX12-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_add_co_i32 s2, s2, s2
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX12-NEXT: s_mov_b32 m0, s2
|
||||
; GFX12-NEXT: s_lshl_b32 m0, s2, 2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 3
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 2
|
||||
; GFX12-NEXT: v_mov_b32_e32 v4, 0
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
@@ -387,104 +334,59 @@ define amdgpu_kernel void @test_bitcast_llc_v16i64_v4i256(ptr addrspace(1) %out,
|
||||
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v8, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX9-NEXT: s_add_i32 s3, s2, 1
|
||||
; GFX9-NEXT: s_add_i32 s3, s3, s3
|
||||
; GFX9-NEXT: s_lshl_b32 s3, s3, 1
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 3
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v2
|
||||
; GFX9-NEXT: v_mov_b32_e32 v1, v1
|
||||
; GFX9-NEXT: s_add_i32 s4, s3, 3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, v0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v7, v7
|
||||
; GFX9-NEXT: v_mov_b32_e32 v6, v6
|
||||
; GFX9-NEXT: v_mov_b32_e32 v5, v5
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, v4
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s5, s3, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s4, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s5, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v5, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s3, s2, 3
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v7, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: s_add_i32 s2, s2, 2
|
||||
; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v6, v0
|
||||
; GFX9-NEXT: s_set_gpr_idx_off
|
||||
; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
|
||||
; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
|
||||
; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16
|
||||
; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: test_bitcast_llc_v16i64_v4i256:
|
||||
; GFX11: ; %bb.0: ; %entry
|
||||
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x8
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_add_i32 s2, s0, s0
|
||||
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
|
||||
; GFX11-NEXT: s_add_i32 s0, s2, 1
|
||||
; GFX11-NEXT: s_add_i32 s2, s2, s2
|
||||
; GFX11-NEXT: s_add_i32 s0, s0, s0
|
||||
; GFX11-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX11-NEXT: s_lshl_b32 s3, s0, 1
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX11-NEXT: s_mov_b32 m0, s3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s3, 3
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s3, 2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX11-NEXT: s_mov_b32 m0, s2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v5, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v4, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v7, v0
|
||||
; GFX11-NEXT: s_add_i32 m0, s2, 2
|
||||
; GFX11-NEXT: v_mov_b32_e32 v8, 0
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v6, v0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1]
|
||||
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
|
||||
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
|
||||
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v8, 0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_lshl_b32 m0, s2, 3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v7, v7
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v6, v6
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v5, v5
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v4, v4
|
||||
; GFX11-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX11-NEXT: s_clause 0x1
|
||||
; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
|
||||
; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-LABEL: test_bitcast_llc_v16i64_v4i256:
|
||||
; GFX12: ; %bb.0: ; %entry
|
||||
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_add_co_i32 s2, s2, s2
|
||||
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
|
||||
; GFX12-NEXT: s_add_co_i32 s3, s2, 1
|
||||
; GFX12-NEXT: s_add_co_i32 s2, s2, s2
|
||||
; GFX12-NEXT: s_add_co_i32 s3, s3, s3
|
||||
; GFX12-NEXT: s_lshl_b32 s2, s2, 1
|
||||
; GFX12-NEXT: s_lshl_b32 s3, s3, 1
|
||||
; GFX12-NEXT: s_mov_b32 m0, s3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s3, 3
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s3, 2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v0
|
||||
; GFX12-NEXT: s_mov_b32 m0, s2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v5, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v4, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v7, v0
|
||||
; GFX12-NEXT: s_add_co_i32 m0, s2, 2
|
||||
; GFX12-NEXT: v_mov_b32_e32 v8, 0
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v6, v0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_lshl_b32 m0, s2, 3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v3, v3
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v2, v2
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v1, v1
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v7, v7
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v6, v6
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v5, v5
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v4, v4
|
||||
; GFX12-NEXT: v_movrels_b32_e32 v0, v0
|
||||
; GFX12-NEXT: s_clause 0x1
|
||||
; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1]
|
||||
; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16
|
||||
; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16
|
||||
; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1]
|
||||
; GFX12-NEXT: s_endpgm
|
||||
entry:
|
||||
%alloca = freeze <16 x i64> poison
|
||||
|
||||
@@ -39,20 +39,21 @@ define void @dynamicIndex(ptr %addr, ptr %addr2, i32 %index) {
|
||||
; CHECK-NEXT: mov r4, sp
|
||||
; CHECK-NEXT: bfc r4, #0, #4
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; CHECK-NEXT: movs r3, #2
|
||||
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
|
||||
; CHECK-NEXT: adds r0, r2, r2
|
||||
; CHECK-NEXT: and r2, r0, #2
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: and.w r0, r3, r2, lsl #1
|
||||
; CHECK-NEXT: mov r12, sp
|
||||
; CHECK-NEXT: and r0, r0, #3
|
||||
; CHECK-NEXT: lsls r2, r2, #2
|
||||
; CHECK-NEXT: mov r3, r12
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r2
|
||||
; CHECK-NEXT: orr.w r0, r12, r0, lsl #2
|
||||
; CHECK-NEXT: sub.w r4, r7, #8
|
||||
; CHECK-NEXT: lsls r0, r0, #2
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r0
|
||||
; CHECK-NEXT: lsls r0, r2, #1
|
||||
; CHECK-NEXT: adds r0, #1
|
||||
; CHECK-NEXT: and r0, r0, #3
|
||||
; CHECK-NEXT: ldr r2, [r3]
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vldr d18, [r1]
|
||||
; CHECK-NEXT: orr.w r0, r12, r0, lsl #2
|
||||
; CHECK-NEXT: ldr r0, [r0]
|
||||
; CHECK-NEXT: vmov d16, r2, r0
|
||||
; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18
|
||||
; CHECK-NEXT: vstr d16, [r1]
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
define i8 @baz(ptr %ptr, i32 %arg) {
|
||||
; CHECK-LABEL: baz:
|
||||
; CHECK: @ %bb.0: @ %bb
|
||||
; CHECK-NEXT: add r1, r1, r1
|
||||
; CHECK-NEXT: and r1, r1, #2
|
||||
; CHECK-NEXT: mov r2, #2
|
||||
; CHECK-NEXT: and r1, r2, r1, lsl #1
|
||||
; CHECK-NEXT: ldr r0, [r0, r1, lsl #2]
|
||||
; CHECK-NEXT: bx lr
|
||||
bb:
|
||||
|
||||
@@ -168,7 +168,7 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
|
||||
; LA32-LABEL: extract_4xi64_idx:
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: xvld $xr0, $a0, 0
|
||||
; LA32-NEXT: add.w $a0, $a2, $a2
|
||||
; LA32-NEXT: slli.w $a0, $a2, 1
|
||||
; LA32-NEXT: addi.w $a2, $a0, 1
|
||||
; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
|
||||
; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1
|
||||
|
||||
@@ -268,7 +268,7 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI15_0)
|
||||
; LA32-NEXT: xvld $xr0, $a5, %pc_lo12(.LCPI15_0)
|
||||
; LA32-NEXT: add.w $a4, $a4, $a4
|
||||
; LA32-NEXT: slli.w $a4, $a4, 1
|
||||
; LA32-NEXT: xvld $xr1, $a0, 0
|
||||
; LA32-NEXT: xvreplgr2vr.w $xr2, $a4
|
||||
; LA32-NEXT: xvseq.w $xr2, $xr2, $xr0
|
||||
|
||||
@@ -167,7 +167,7 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
|
||||
; LA32-LABEL: extract_2xi64_idx:
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: vld $vr0, $a0, 0
|
||||
; LA32-NEXT: add.w $a0, $a2, $a2
|
||||
; LA32-NEXT: slli.w $a0, $a2, 1
|
||||
; LA32-NEXT: addi.w $a2, $a0, 1
|
||||
; LA32-NEXT: vreplve.w $vr1, $vr0, $a2
|
||||
; LA32-NEXT: vreplve.w $vr0, $vr0, $a0
|
||||
|
||||
@@ -188,7 +188,7 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI9_0)
|
||||
; LA32-NEXT: vld $vr0, $a5, %pc_lo12(.LCPI9_0)
|
||||
; LA32-NEXT: add.w $a4, $a4, $a4
|
||||
; LA32-NEXT: slli.w $a4, $a4, 1
|
||||
; LA32-NEXT: vld $vr1, $a0, 0
|
||||
; LA32-NEXT: vreplgr2vr.w $vr2, $a4
|
||||
; LA32-NEXT: vseq.w $vr2, $vr2, $vr0
|
||||
|
||||
@@ -1442,8 +1442,8 @@ define i64 @extract_sext_v2i64_vidx() nounwind {
|
||||
; O32-BE-NEXT: addu $1, $2, $25
|
||||
; O32-BE-NEXT: lw $2, %got(i32)($1)
|
||||
; O32-BE-NEXT: lw $2, 0($2)
|
||||
; O32-BE-NEXT: addu $2, $2, $2
|
||||
; O32-BE-NEXT: addiu $3, $2, 1
|
||||
; O32-BE-NEXT: sll $2, $2, 1
|
||||
; O32-BE-NEXT: ori $3, $2, 1
|
||||
; O32-BE-NEXT: lw $1, %got(v2i64)($1)
|
||||
; O32-BE-NEXT: ld.d $w0, 0($1)
|
||||
; O32-BE-NEXT: addv.d $w0, $w0, $w0
|
||||
@@ -1461,8 +1461,8 @@ define i64 @extract_sext_v2i64_vidx() nounwind {
|
||||
; O32-LE-NEXT: addu $1, $2, $25
|
||||
; O32-LE-NEXT: lw $2, %got(i32)($1)
|
||||
; O32-LE-NEXT: lw $2, 0($2)
|
||||
; O32-LE-NEXT: addu $2, $2, $2
|
||||
; O32-LE-NEXT: addiu $3, $2, 1
|
||||
; O32-LE-NEXT: sll $2, $2, 1
|
||||
; O32-LE-NEXT: ori $3, $2, 1
|
||||
; O32-LE-NEXT: lw $1, %got(v2i64)($1)
|
||||
; O32-LE-NEXT: ld.d $w0, 0($1)
|
||||
; O32-LE-NEXT: addv.d $w0, $w0, $w0
|
||||
@@ -1669,8 +1669,8 @@ define i64 @extract_zext_v2i64_vidx() nounwind {
|
||||
; O32-BE-NEXT: addu $1, $2, $25
|
||||
; O32-BE-NEXT: lw $2, %got(i32)($1)
|
||||
; O32-BE-NEXT: lw $2, 0($2)
|
||||
; O32-BE-NEXT: addu $2, $2, $2
|
||||
; O32-BE-NEXT: addiu $3, $2, 1
|
||||
; O32-BE-NEXT: sll $2, $2, 1
|
||||
; O32-BE-NEXT: ori $3, $2, 1
|
||||
; O32-BE-NEXT: lw $1, %got(v2i64)($1)
|
||||
; O32-BE-NEXT: ld.d $w0, 0($1)
|
||||
; O32-BE-NEXT: addv.d $w0, $w0, $w0
|
||||
@@ -1688,8 +1688,8 @@ define i64 @extract_zext_v2i64_vidx() nounwind {
|
||||
; O32-LE-NEXT: addu $1, $2, $25
|
||||
; O32-LE-NEXT: lw $2, %got(i32)($1)
|
||||
; O32-LE-NEXT: lw $2, 0($2)
|
||||
; O32-LE-NEXT: addu $2, $2, $2
|
||||
; O32-LE-NEXT: addiu $3, $2, 1
|
||||
; O32-LE-NEXT: sll $2, $2, 1
|
||||
; O32-LE-NEXT: ori $3, $2, 1
|
||||
; O32-LE-NEXT: lw $1, %got(v2i64)($1)
|
||||
; O32-LE-NEXT: ld.d $w0, 0($1)
|
||||
; O32-LE-NEXT: addv.d $w0, $w0, $w0
|
||||
|
||||
@@ -165,14 +165,14 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
|
||||
;
|
||||
; CHECK-32-LABEL: testDoubleword:
|
||||
; CHECK-32: # %bb.0: # %entry
|
||||
; CHECK-32-NEXT: add 5, 6, 6
|
||||
; CHECK-32-NEXT: addi 7, 1, -32
|
||||
; CHECK-32-NEXT: rlwinm 5, 6, 3, 28, 28
|
||||
; CHECK-32-NEXT: stxv 34, -32(1)
|
||||
; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 28
|
||||
; CHECK-32-NEXT: stwx 3, 7, 6
|
||||
; CHECK-32-NEXT: addi 3, 5, 1
|
||||
; CHECK-32-NEXT: stwx 3, 7, 5
|
||||
; CHECK-32-NEXT: slwi 3, 6, 1
|
||||
; CHECK-32-NEXT: addi 5, 1, -16
|
||||
; CHECK-32-NEXT: lxv 0, -32(1)
|
||||
; CHECK-32-NEXT: addi 3, 3, 1
|
||||
; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
|
||||
; CHECK-32-NEXT: stxv 0, -16(1)
|
||||
; CHECK-32-NEXT: stwx 4, 5, 3
|
||||
@@ -187,8 +187,8 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
|
||||
;
|
||||
; CHECK-32-P10-LABEL: testDoubleword:
|
||||
; CHECK-32-P10: # %bb.0: # %entry
|
||||
; CHECK-32-P10-NEXT: add 5, 6, 6
|
||||
; CHECK-32-P10-NEXT: slwi 6, 5, 2
|
||||
; CHECK-32-P10-NEXT: slwi 5, 6, 1
|
||||
; CHECK-32-P10-NEXT: slwi 6, 6, 3
|
||||
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
|
||||
; CHECK-32-P10-NEXT: addi 3, 5, 1
|
||||
; CHECK-32-P10-NEXT: slwi 3, 3, 2
|
||||
|
||||
@@ -990,14 +990,15 @@ entry:
|
||||
define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
|
||||
; CHECK-LABEL: getvelsl:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: add 5, 3, 3
|
||||
; CHECK-NEXT: rlwinm 5, 3, 3, 28, 28
|
||||
; CHECK-NEXT: slwi 3, 3, 1
|
||||
; CHECK-NEXT: addi 4, 1, -16
|
||||
; CHECK-NEXT: rlwinm 3, 5, 2, 28, 28
|
||||
; CHECK-NEXT: addi 5, 5, 1
|
||||
; CHECK-NEXT: addi 3, 3, 1
|
||||
; CHECK-NEXT: stxvw4x 34, 0, 4
|
||||
; CHECK-NEXT: rlwinm 5, 5, 2, 28, 29
|
||||
; CHECK-NEXT: lwzx 3, 4, 3
|
||||
; CHECK-NEXT: lwzx 4, 4, 5
|
||||
; CHECK-NEXT: lwzx 5, 4, 5
|
||||
; CHECK-NEXT: rlwinm 3, 3, 2, 28, 29
|
||||
; CHECK-NEXT: lwzx 4, 4, 3
|
||||
; CHECK-NEXT: mr 3, 5
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <2 x i64> %vsl, i32 %i
|
||||
@@ -1008,14 +1009,15 @@ entry:
|
||||
define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
|
||||
; CHECK-LABEL: getvelul:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: add 5, 3, 3
|
||||
; CHECK-NEXT: rlwinm 5, 3, 3, 28, 28
|
||||
; CHECK-NEXT: slwi 3, 3, 1
|
||||
; CHECK-NEXT: addi 4, 1, -16
|
||||
; CHECK-NEXT: rlwinm 3, 5, 2, 28, 28
|
||||
; CHECK-NEXT: addi 5, 5, 1
|
||||
; CHECK-NEXT: addi 3, 3, 1
|
||||
; CHECK-NEXT: stxvw4x 34, 0, 4
|
||||
; CHECK-NEXT: rlwinm 5, 5, 2, 28, 29
|
||||
; CHECK-NEXT: lwzx 3, 4, 3
|
||||
; CHECK-NEXT: lwzx 4, 4, 5
|
||||
; CHECK-NEXT: lwzx 5, 4, 5
|
||||
; CHECK-NEXT: rlwinm 3, 3, 2, 28, 29
|
||||
; CHECK-NEXT: lwzx 4, 4, 3
|
||||
; CHECK-NEXT: mr 3, 5
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%vecext = extractelement <2 x i64> %vul, i32 %i
|
||||
|
||||
@@ -241,12 +241,12 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
|
||||
;
|
||||
; AIX-P8-32-LABEL: testDoubleword:
|
||||
; AIX-P8-32: # %bb.0: # %entry
|
||||
; AIX-P8-32-NEXT: add r6, r6, r6
|
||||
; AIX-P8-32-NEXT: addi r5, r1, -16
|
||||
; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 28
|
||||
; AIX-P8-32-NEXT: rlwinm r7, r6, 3, 28, 28
|
||||
; AIX-P8-32-NEXT: stxvd2x v2, 0, r5
|
||||
; AIX-P8-32-NEXT: stwx r3, r5, r7
|
||||
; AIX-P8-32-NEXT: addi r3, r6, 1
|
||||
; AIX-P8-32-NEXT: slwi r3, r6, 1
|
||||
; AIX-P8-32-NEXT: addi r3, r3, 1
|
||||
; AIX-P8-32-NEXT: rlwinm r3, r3, 2, 28, 29
|
||||
; AIX-P8-32-NEXT: stwx r4, r5, r3
|
||||
; AIX-P8-32-NEXT: lxvd2x v2, 0, r5
|
||||
|
||||
@@ -621,7 +621,7 @@ define i64 @extractelt_v3i64_idx(<3 x i64> %a, i32 zeroext %idx) nounwind {
|
||||
; RV32: # %bb.0:
|
||||
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
|
||||
; RV32-NEXT: vadd.vv v8, v8, v8
|
||||
; RV32-NEXT: add a0, a0, a0
|
||||
; RV32-NEXT: slli a0, a0, 1
|
||||
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
|
||||
; RV32-NEXT: vslidedown.vx v10, v8, a0
|
||||
; RV32-NEXT: addi a1, a0, 1
|
||||
|
||||
@@ -1013,14 +1013,13 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
|
||||
; X86AVX2-NEXT: movl 8(%ebp), %ecx
|
||||
; X86AVX2-NEXT: movl 12(%ebp), %edx
|
||||
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
|
||||
; X86AVX2-NEXT: leal (%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $2, %esi
|
||||
; X86AVX2-NEXT: movl %ecx, (%esp,%esi,4)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $1, %eax
|
||||
; X86AVX2-NEXT: movl %ecx, (%esp,%eax,8)
|
||||
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
|
||||
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
|
||||
; X86AVX2-NEXT: andl $3, %eax
|
||||
; X86AVX2-NEXT: movl %edx, 16(%esp,%eax,4)
|
||||
; X86AVX2-NEXT: andl $3, %esi
|
||||
; X86AVX2-NEXT: movl %edx, 16(%esp,%esi,4)
|
||||
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
|
||||
; X86AVX2-NEXT: leal -4(%ebp), %esp
|
||||
; X86AVX2-NEXT: popl %esi
|
||||
@@ -1362,14 +1361,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
|
||||
; X86AVX2-NEXT: movl (%ecx), %edx
|
||||
; X86AVX2-NEXT: movl 4(%ecx), %ecx
|
||||
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
|
||||
; X86AVX2-NEXT: leal (%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $2, %esi
|
||||
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $1, %eax
|
||||
; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
|
||||
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
|
||||
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
|
||||
; X86AVX2-NEXT: andl $3, %eax
|
||||
; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
|
||||
; X86AVX2-NEXT: andl $3, %esi
|
||||
; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4)
|
||||
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
|
||||
; X86AVX2-NEXT: leal -4(%ebp), %esp
|
||||
; X86AVX2-NEXT: popl %esi
|
||||
@@ -1746,14 +1744,13 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
|
||||
; X86AVX2-NEXT: movl 8(%ebp), %ecx
|
||||
; X86AVX2-NEXT: movl 12(%ebp), %edx
|
||||
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
|
||||
; X86AVX2-NEXT: leal (%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $6, %esi
|
||||
; X86AVX2-NEXT: movl %ecx, (%esp,%esi,4)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $3, %eax
|
||||
; X86AVX2-NEXT: movl %ecx, (%esp,%eax,8)
|
||||
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
|
||||
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
|
||||
; X86AVX2-NEXT: andl $7, %eax
|
||||
; X86AVX2-NEXT: movl %edx, 32(%esp,%eax,4)
|
||||
; X86AVX2-NEXT: andl $7, %esi
|
||||
; X86AVX2-NEXT: movl %edx, 32(%esp,%esi,4)
|
||||
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
|
||||
; X86AVX2-NEXT: leal -4(%ebp), %esp
|
||||
; X86AVX2-NEXT: popl %esi
|
||||
@@ -2128,14 +2125,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
|
||||
; X86AVX2-NEXT: movl (%ecx), %edx
|
||||
; X86AVX2-NEXT: movl 4(%ecx), %ecx
|
||||
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
|
||||
; X86AVX2-NEXT: leal (%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $6, %esi
|
||||
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
|
||||
; X86AVX2-NEXT: andl $3, %eax
|
||||
; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
|
||||
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
|
||||
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
|
||||
; X86AVX2-NEXT: andl $7, %eax
|
||||
; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
|
||||
; X86AVX2-NEXT: andl $7, %esi
|
||||
; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4)
|
||||
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
|
||||
; X86AVX2-NEXT: leal -4(%ebp), %esp
|
||||
; X86AVX2-NEXT: popl %esi
|
||||
|
||||
@@ -11,7 +11,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||
; X86-SSE-NEXT: movl %esp, %ebp
|
||||
; X86-SSE-NEXT: andl $-16, %esp
|
||||
; X86-SSE-NEXT: subl $272, %esp # imm = 0x110
|
||||
; X86-SSE-NEXT: movl 88(%ebp), %ecx
|
||||
; X86-SSE-NEXT: movl 88(%ebp), %eax
|
||||
; X86-SSE-NEXT: movdqa 72(%ebp), %xmm0
|
||||
; X86-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; X86-SSE-NEXT: xorps %xmm1, %xmm1
|
||||
@@ -31,10 +31,9 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||
; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: movaps %xmm1, (%esp)
|
||||
; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp)
|
||||
; X86-SSE-NEXT: leal (%ecx,%ecx), %eax
|
||||
; X86-SSE-NEXT: andl $31, %eax
|
||||
; X86-SSE-NEXT: movl 128(%esp,%eax,4), %eax
|
||||
; X86-SSE-NEXT: leal 1(%ecx,%ecx), %ecx
|
||||
; X86-SSE-NEXT: leal 1(%eax,%eax), %ecx
|
||||
; X86-SSE-NEXT: andl $15, %eax
|
||||
; X86-SSE-NEXT: movl 128(%esp,%eax,8), %eax
|
||||
; X86-SSE-NEXT: andl $31, %ecx
|
||||
; X86-SSE-NEXT: movl (%esp,%ecx,4), %edx
|
||||
; X86-SSE-NEXT: movl %ebp, %esp
|
||||
@@ -66,7 +65,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||
; X86-AVX-NEXT: movl %esp, %ebp
|
||||
; X86-AVX-NEXT: andl $-32, %esp
|
||||
; X86-AVX-NEXT: subl $288, %esp # imm = 0x120
|
||||
; X86-AVX-NEXT: movl 40(%ebp), %ecx
|
||||
; X86-AVX-NEXT: movl 40(%ebp), %eax
|
||||
; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
@@ -77,10 +76,9 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
|
||||
; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
|
||||
; X86-AVX-NEXT: vmovaps %ymm1, (%esp)
|
||||
; X86-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
|
||||
; X86-AVX-NEXT: leal (%ecx,%ecx), %eax
|
||||
; X86-AVX-NEXT: andl $31, %eax
|
||||
; X86-AVX-NEXT: movl 128(%esp,%eax,4), %eax
|
||||
; X86-AVX-NEXT: leal 1(%ecx,%ecx), %ecx
|
||||
; X86-AVX-NEXT: leal 1(%eax,%eax), %ecx
|
||||
; X86-AVX-NEXT: andl $15, %eax
|
||||
; X86-AVX-NEXT: movl 128(%esp,%eax,8), %eax
|
||||
; X86-AVX-NEXT: andl $31, %ecx
|
||||
; X86-AVX-NEXT: movl (%esp,%ecx,4), %edx
|
||||
; X86-AVX-NEXT: movl %ebp, %esp
|
||||
|
||||
Reference in New Issue
Block a user