diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 26036207c4b8..0cff883f8318 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -231,8 +231,8 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); - - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx, + DAG.getShiftAmountConstant(1, Idx.getValueType(), dl)); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, @@ -446,12 +446,12 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { std::swap(Lo, Hi); SDValue Idx = N->getOperand(2); - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx, + DAG.getShiftAmountConstant(1, Idx.getValueType(), dl)); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); - Idx = DAG.getNode(ISD::ADD, dl, - Idx.getValueType(), Idx, + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, DAG.getConstant(1, dl, Idx.getValueType())); - NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll index 641701beeda4..facee0829611 100644 --- a/llvm/test/CodeGen/AArch64/insertextract.ll +++ b/llvm/test/CodeGen/AArch64/insertextract.ll @@ -2856,23 +2856,23 @@ define i128 @extract_v2i128_c(<2 x i128> %a, i32 %c) { ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: adds x9, x0, x0 -; CHECK-SD-NEXT: mov w8, w4 +; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: // kill: def $w4 killed $w4 def $x4 ; CHECK-SD-NEXT: adc x10, x1, x1 ; CHECK-SD-NEXT: adds x11, x2, x2 ; CHECK-SD-NEXT: fmov d1, x9 ; CHECK-SD-NEXT: fmov d0, x11 -; CHECK-SD-NEXT: adc x12, x3, x3 -; CHECK-SD-NEXT: add x8, x8, x8 -; CHECK-SD-NEXT: and x9, x8, #0x2 -; CHECK-SD-NEXT: orr w8, w8, #0x1 -; CHECK-SD-NEXT: mov x11, sp +; CHECK-SD-NEXT: adc x11, x3, x3 +; CHECK-SD-NEXT: orr w8, w8, w4, lsl #1 +; CHECK-SD-NEXT: ubfiz x9, x4, #4, #1 ; CHECK-SD-NEXT: mov v1.d[1], x10 ; CHECK-SD-NEXT: add x10, sp, #32 ; CHECK-SD-NEXT: and x8, x8, #0x3 -; CHECK-SD-NEXT: mov v0.d[1], x12 +; CHECK-SD-NEXT: mov v0.d[1], x11 +; CHECK-SD-NEXT: mov x11, sp ; CHECK-SD-NEXT: stp q1, q0, [sp] ; CHECK-SD-NEXT: stp q1, q0, [sp, #32] -; CHECK-SD-NEXT: ldr x0, [x10, x9, lsl #3] +; CHECK-SD-NEXT: ldr x0, [x10, x9] ; CHECK-SD-NEXT: ldr x1, [x11, x8, lsl #3] ; CHECK-SD-NEXT: add sp, sp, #64 ; CHECK-SD-NEXT: ret diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-dynamic-idx-bitcasts-llc.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-dynamic-idx-bitcasts-llc.ll index 084b7a2d59b2..ab6597363825 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-dynamic-idx-bitcasts-llc.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-dynamic-idx-bitcasts-llc.ll @@ -6,18 +6,15 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, i32 %idx) { ; GFX9-LABEL: test_bitcast_llc_v128i8_v16i8: ; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_lshl_b32 s0, s0, 8 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: s_load_dword s33, s[4:5], 0x8 -; GFX9-NEXT: s_lshl_b32 s0, s0, 8 ; GFX9-NEXT: s_and_b32 s1, s0, 0xff ; GFX9-NEXT: s_or_b32 s0, s1, s0 ; GFX9-NEXT: s_and_b32 s1, s0, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_or_b32 s0, s1, s0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_i32 s33, s33, s33 ; GFX9-NEXT: s_mov_b32 s1, s0 -; GFX9-NEXT: s_lshl_b32 s33, s33, 1 ; GFX9-NEXT: s_mov_b32 s2, s0 ; GFX9-NEXT: s_mov_b32 s3, s0 ; GFX9-NEXT: s_mov_b32 s4, s0 @@ -48,7 +45,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX9-NEXT: s_mov_b32 s29, s0 ; GFX9-NEXT: s_mov_b32 s30, s0 ; GFX9-NEXT: s_mov_b32 s31, s0 -; GFX9-NEXT: s_add_i32 s36, s33, 3 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_lshl_b32 s33, s33, 2 ; GFX9-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX9-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX9-NEXT: v_mov_b64_e32 v[4:5], s[4:5] @@ -65,12 +63,9 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX9-NEXT: v_mov_b64_e32 v[26:27], s[26:27] ; GFX9-NEXT: v_mov_b64_e32 v[28:29], s[28:29] ; GFX9-NEXT: v_mov_b64_e32 v[30:31], s[30:31] -; GFX9-NEXT: s_set_gpr_idx_on s36, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v35, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s0, s33, 2 -; GFX9-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v34, v0 +; GFX9-NEXT: s_set_gpr_idx_on s33, gpr_idx(SRC0) +; GFX9-NEXT: v_mov_b32_e32 v35, v3 +; GFX9-NEXT: v_mov_b32_e32 v34, v2 ; GFX9-NEXT: s_set_gpr_idx_off ; GFX9-NEXT: v_mov_b32_e32 v36, 0 ; GFX9-NEXT: s_set_gpr_idx_on s33, gpr_idx(SRC0) @@ -125,10 +120,11 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX11-NEXT: s_mov_b32 s29, s0 ; GFX11-NEXT: s_mov_b32 s30, s0 ; GFX11-NEXT: s_mov_b32 s31, s0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s33, s33, s33 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshl_b32 m0, s33, 2 +; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 ; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 ; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 ; GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 @@ -142,14 +138,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX11-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25 ; GFX11-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27 ; GFX11-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29 -; GFX11-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 -; GFX11-NEXT: s_lshl_b32 s0, s33, 1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 m0, s0, 3 -; GFX11-NEXT: v_movrels_b32_e32 v34, v0 -; GFX11-NEXT: s_add_i32 m0, s0, 2 -; GFX11-NEXT: v_movrels_b32_e32 v33, v0 -; GFX11-NEXT: s_mov_b32 m0, s0 +; GFX11-NEXT: v_movrels_b32_e32 v34, v3 +; GFX11-NEXT: v_movrels_b32_e32 v33, v2 ; GFX11-NEXT: v_movrels_b32_e32 v32, v1 ; GFX11-NEXT: v_movrels_b32_e32 v31, v0 ; GFX11-NEXT: global_store_b128 v35, v[31:34], s[34:35] @@ -198,10 +188,11 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX12-NEXT: s_mov_b32 s29, s0 ; GFX12-NEXT: s_mov_b32 s30, s0 ; GFX12-NEXT: s_mov_b32 s31, s0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_add_co_i32 s33, s38, s38 ; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_lshl_b32 m0, s38, 2 +; GFX12-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 ; GFX12-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 ; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 ; GFX12-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 @@ -215,14 +206,8 @@ define amdgpu_kernel void @test_bitcast_llc_v128i8_v16i8(ptr addrspace(1) %out, ; GFX12-NEXT: v_dual_mov_b32 v24, s24 :: v_dual_mov_b32 v25, s25 ; GFX12-NEXT: v_dual_mov_b32 v26, s26 :: v_dual_mov_b32 v27, s27 ; GFX12-NEXT: v_dual_mov_b32 v28, s28 :: v_dual_mov_b32 v29, s29 -; GFX12-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 -; GFX12-NEXT: s_lshl_b32 s0, s33, 1 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_add_co_i32 m0, s0, 3 -; GFX12-NEXT: v_movrels_b32_e32 v34, v0 -; GFX12-NEXT: s_add_co_i32 m0, s0, 2 -; GFX12-NEXT: v_movrels_b32_e32 v33, v0 -; GFX12-NEXT: s_mov_b32 m0, s0 +; GFX12-NEXT: v_movrels_b32_e32 v34, v3 +; GFX12-NEXT: v_movrels_b32_e32 v33, v2 ; GFX12-NEXT: v_movrels_b32_e32 v32, v1 ; GFX12-NEXT: v_movrels_b32_e32 v31, v0 ; GFX12-NEXT: global_store_b128 v35, v[31:34], s[36:37] @@ -243,20 +228,13 @@ define amdgpu_kernel void @test_bitcast_llc_v64i16_v8i16(ptr addrspace(1) %out, ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_i32 s2, s2, s2 -; GFX9-NEXT: s_lshl_b32 s2, s2, 1 +; GFX9-NEXT: s_lshl_b32 s2, s2, 2 ; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GFX9-NEXT: v_mov_b32_e32 v3, v3 +; GFX9-NEXT: v_mov_b32_e32 v2, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v1 -; GFX9-NEXT: s_add_i32 s3, s2, 3 ; GFX9-NEXT: v_mov_b32_e32 v0, v0 ; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s2, s2, 2 -; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-NEXT: s_set_gpr_idx_off ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm ; @@ -265,38 +243,26 @@ define amdgpu_kernel void @test_bitcast_llc_v64i16_v8i16(ptr addrspace(1) %out, ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s2, s2, s2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_lshl_b32 s2, s2, 1 -; GFX11-NEXT: s_mov_b32 m0, s2 +; GFX11-NEXT: s_lshl_b32 m0, s2, 2 +; GFX11-NEXT: v_movrels_b32_e32 v3, v3 +; GFX11-NEXT: v_movrels_b32_e32 v2, v2 ; GFX11-NEXT: v_movrels_b32_e32 v1, v1 ; GFX11-NEXT: v_movrels_b32_e32 v0, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_movrels_b32_e32 v3, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 2 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: v_movrels_b32_e32 v2, v0 ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_bitcast_llc_v64i16_v8i16: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; GFX12-NEXT: v_mov_b32_e32 v4, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_add_co_i32 s2, s2, s2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_lshl_b32 s2, s2, 1 -; GFX12-NEXT: s_mov_b32 m0, s2 +; GFX12-NEXT: s_lshl_b32 m0, s2, 2 +; GFX12-NEXT: v_movrels_b32_e32 v3, v3 +; GFX12-NEXT: v_movrels_b32_e32 v2, v2 ; GFX12-NEXT: v_movrels_b32_e32 v1, v1 ; GFX12-NEXT: v_movrels_b32_e32 v0, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_movrels_b32_e32 v3, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 2 -; GFX12-NEXT: v_mov_b32_e32 v4, 0 -; GFX12-NEXT: v_movrels_b32_e32 v2, v0 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX12-NEXT: s_endpgm entry: @@ -315,20 +281,13 @@ define amdgpu_kernel void @test_bitcast_llc_v32i32_v4i32(ptr addrspace(1) %out, ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_i32 s2, s2, s2 -; GFX9-NEXT: s_lshl_b32 s2, s2, 1 +; GFX9-NEXT: s_lshl_b32 s2, s2, 2 ; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GFX9-NEXT: v_mov_b32_e32 v3, v3 +; GFX9-NEXT: v_mov_b32_e32 v2, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v1 -; GFX9-NEXT: s_add_i32 s3, s2, 3 ; GFX9-NEXT: v_mov_b32_e32 v0, v0 ; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s2, s2, 2 -; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-NEXT: s_set_gpr_idx_off ; GFX9-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm ; @@ -337,38 +296,26 @@ define amdgpu_kernel void @test_bitcast_llc_v32i32_v4i32(ptr addrspace(1) %out, ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v4, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s2, s2, s2 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_lshl_b32 s2, s2, 1 -; GFX11-NEXT: s_mov_b32 m0, s2 +; GFX11-NEXT: s_lshl_b32 m0, s2, 2 +; GFX11-NEXT: v_movrels_b32_e32 v3, v3 +; GFX11-NEXT: v_movrels_b32_e32 v2, v2 ; GFX11-NEXT: v_movrels_b32_e32 v1, v1 ; GFX11-NEXT: v_movrels_b32_e32 v0, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_movrels_b32_e32 v3, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 2 -; GFX11-NEXT: v_mov_b32_e32 v4, 0 -; GFX11-NEXT: v_movrels_b32_e32 v2, v0 ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_bitcast_llc_v32i32_v4i32: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 +; GFX12-NEXT: v_mov_b32_e32 v4, 0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_add_co_i32 s2, s2, s2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_lshl_b32 s2, s2, 1 -; GFX12-NEXT: s_mov_b32 m0, s2 +; GFX12-NEXT: s_lshl_b32 m0, s2, 2 +; GFX12-NEXT: v_movrels_b32_e32 v3, v3 +; GFX12-NEXT: v_movrels_b32_e32 v2, v2 ; GFX12-NEXT: v_movrels_b32_e32 v1, v1 ; GFX12-NEXT: v_movrels_b32_e32 v0, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_movrels_b32_e32 v3, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 2 -; GFX12-NEXT: v_mov_b32_e32 v4, 0 -; GFX12-NEXT: v_movrels_b32_e32 v2, v0 ; GFX12-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX12-NEXT: s_endpgm entry: @@ -387,104 +334,59 @@ define amdgpu_kernel void @test_bitcast_llc_v16i64_v4i256(ptr addrspace(1) %out, ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_i32 s2, s2, s2 -; GFX9-NEXT: s_add_i32 s3, s2, 1 -; GFX9-NEXT: s_add_i32 s3, s3, s3 -; GFX9-NEXT: s_lshl_b32 s3, s3, 1 -; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0) +; GFX9-NEXT: s_lshl_b32 s2, s2, 3 +; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) +; GFX9-NEXT: v_mov_b32_e32 v3, v3 +; GFX9-NEXT: v_mov_b32_e32 v2, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v1 -; GFX9-NEXT: s_add_i32 s4, s3, 3 ; GFX9-NEXT: v_mov_b32_e32 v0, v0 +; GFX9-NEXT: v_mov_b32_e32 v7, v7 +; GFX9-NEXT: v_mov_b32_e32 v6, v6 +; GFX9-NEXT: v_mov_b32_e32 v5, v5 +; GFX9-NEXT: v_mov_b32_e32 v4, v4 ; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s5, s3, 2 -; GFX9-NEXT: s_set_gpr_idx_on s4, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s2, s2, s2 -; GFX9-NEXT: s_set_gpr_idx_on s5, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_lshl_b32 s2, s2, 1 -; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-NEXT: v_mov_b32_e32 v4, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s3, s2, 3 -; GFX9-NEXT: s_set_gpr_idx_on s3, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v7, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: s_add_i32 s2, s2, 2 -; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) -; GFX9-NEXT: v_mov_b32_e32 v6, v0 -; GFX9-NEXT: s_set_gpr_idx_off -; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] -; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 +; GFX9-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX9-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX9-NEXT: s_endpgm ; ; GFX11-LABEL: test_bitcast_llc_v16i64_v4i256: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x8 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s2, s0, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_add_i32 s0, s2, 1 -; GFX11-NEXT: s_add_i32 s2, s2, s2 -; GFX11-NEXT: s_add_i32 s0, s0, s0 -; GFX11-NEXT: s_lshl_b32 s2, s2, 1 -; GFX11-NEXT: s_lshl_b32 s3, s0, 1 -; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-NEXT: s_mov_b32 m0, s3 -; GFX11-NEXT: v_movrels_b32_e32 v1, v1 -; GFX11-NEXT: v_movrels_b32_e32 v0, v0 -; GFX11-NEXT: s_add_i32 m0, s3, 3 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_movrels_b32_e32 v3, v0 -; GFX11-NEXT: s_add_i32 m0, s3, 2 -; GFX11-NEXT: v_movrels_b32_e32 v2, v0 -; GFX11-NEXT: s_mov_b32 m0, s2 -; GFX11-NEXT: v_movrels_b32_e32 v5, v1 -; GFX11-NEXT: v_movrels_b32_e32 v4, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 3 -; GFX11-NEXT: v_movrels_b32_e32 v7, v0 -; GFX11-NEXT: s_add_i32 m0, s2, 2 -; GFX11-NEXT: v_mov_b32_e32 v8, 0 -; GFX11-NEXT: v_movrels_b32_e32 v6, v0 -; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] -; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 +; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v8, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshl_b32 m0, s2, 3 +; GFX11-NEXT: v_movrels_b32_e32 v3, v3 +; GFX11-NEXT: v_movrels_b32_e32 v2, v2 +; GFX11-NEXT: v_movrels_b32_e32 v1, v1 +; GFX11-NEXT: v_movrels_b32_e32 v7, v7 +; GFX11-NEXT: v_movrels_b32_e32 v6, v6 +; GFX11-NEXT: v_movrels_b32_e32 v5, v5 +; GFX11-NEXT: v_movrels_b32_e32 v4, v4 +; GFX11-NEXT: v_movrels_b32_e32 v0, v0 +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 +; GFX11-NEXT: global_store_b128 v8, v[0:3], s[0:1] ; GFX11-NEXT: s_endpgm ; ; GFX12-LABEL: test_bitcast_llc_v16i64_v4i256: ; GFX12: ; %bb.0: ; %entry ; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0 -; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_add_co_i32 s2, s2, s2 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_add_co_i32 s3, s2, 1 -; GFX12-NEXT: s_add_co_i32 s2, s2, s2 -; GFX12-NEXT: s_add_co_i32 s3, s3, s3 -; GFX12-NEXT: s_lshl_b32 s2, s2, 1 -; GFX12-NEXT: s_lshl_b32 s3, s3, 1 -; GFX12-NEXT: s_mov_b32 m0, s3 -; GFX12-NEXT: v_movrels_b32_e32 v1, v1 -; GFX12-NEXT: v_movrels_b32_e32 v0, v0 -; GFX12-NEXT: s_add_co_i32 m0, s3, 3 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_movrels_b32_e32 v3, v0 -; GFX12-NEXT: s_add_co_i32 m0, s3, 2 -; GFX12-NEXT: v_movrels_b32_e32 v2, v0 -; GFX12-NEXT: s_mov_b32 m0, s2 -; GFX12-NEXT: v_movrels_b32_e32 v5, v1 -; GFX12-NEXT: v_movrels_b32_e32 v4, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 3 -; GFX12-NEXT: v_movrels_b32_e32 v7, v0 -; GFX12-NEXT: s_add_co_i32 m0, s2, 2 ; GFX12-NEXT: v_mov_b32_e32 v8, 0 -; GFX12-NEXT: v_movrels_b32_e32 v6, v0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_lshl_b32 m0, s2, 3 +; GFX12-NEXT: v_movrels_b32_e32 v3, v3 +; GFX12-NEXT: v_movrels_b32_e32 v2, v2 +; GFX12-NEXT: v_movrels_b32_e32 v1, v1 +; GFX12-NEXT: v_movrels_b32_e32 v7, v7 +; GFX12-NEXT: v_movrels_b32_e32 v6, v6 +; GFX12-NEXT: v_movrels_b32_e32 v5, v5 +; GFX12-NEXT: v_movrels_b32_e32 v4, v4 +; GFX12-NEXT: v_movrels_b32_e32 v0, v0 ; GFX12-NEXT: s_clause 0x1 -; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] -; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 +; GFX12-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:16 +; GFX12-NEXT: global_store_b128 v8, v[0:3], s[0:1] ; GFX12-NEXT: s_endpgm entry: %alloca = freeze <16 x i64> poison diff --git a/llvm/test/CodeGen/ARM/combine-vmovdrr.ll b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll index 31c55cf0a074..83cbd5902f21 100644 --- a/llvm/test/CodeGen/ARM/combine-vmovdrr.ll +++ b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll @@ -39,20 +39,21 @@ define void @dynamicIndex(ptr %addr, ptr %addr2, i32 %index) { ; CHECK-NEXT: mov r4, sp ; CHECK-NEXT: bfc r4, #0, #4 ; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: movs r3, #2 ; CHECK-NEXT: vld1.64 {d16, d17}, [r0] -; CHECK-NEXT: adds r0, r2, r2 -; CHECK-NEXT: and r2, r0, #2 -; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: and.w r0, r3, r2, lsl #1 ; CHECK-NEXT: mov r12, sp -; CHECK-NEXT: and r0, r0, #3 -; CHECK-NEXT: lsls r2, r2, #2 ; CHECK-NEXT: mov r3, r12 -; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r2 -; CHECK-NEXT: orr.w r0, r12, r0, lsl #2 ; CHECK-NEXT: sub.w r4, r7, #8 +; CHECK-NEXT: lsls r0, r0, #2 +; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r0 +; CHECK-NEXT: lsls r0, r2, #1 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: and r0, r0, #3 ; CHECK-NEXT: ldr r2, [r3] -; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: vldr d18, [r1] +; CHECK-NEXT: orr.w r0, r12, r0, lsl #2 +; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: vmov d16, r2, r0 ; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 ; CHECK-NEXT: vstr d16, [r1] diff --git a/llvm/test/CodeGen/ARM/pr122670-regression.ll b/llvm/test/CodeGen/ARM/pr122670-regression.ll index 1178eaae520d..32c7390dc44c 100644 --- a/llvm/test/CodeGen/ARM/pr122670-regression.ll +++ b/llvm/test/CodeGen/ARM/pr122670-regression.ll @@ -8,8 +8,8 @@ define i8 @baz(ptr %ptr, i32 %arg) { ; CHECK-LABEL: baz: ; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: add r1, r1, r1 -; CHECK-NEXT: and r1, r1, #2 +; CHECK-NEXT: mov r2, #2 +; CHECK-NEXT: and r1, r2, r1, lsl #1 ; CHECK-NEXT: ldr r0, [r0, r1, lsl #2] ; CHECK-NEXT: bx lr bb: diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll index 0f598f65dc64..9634605dddfd 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -168,7 +168,7 @@ define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; LA32-LABEL: extract_4xi64_idx: ; LA32: # %bb.0: ; LA32-NEXT: xvld $xr0, $a0, 0 -; LA32-NEXT: add.w $a0, $a2, $a2 +; LA32-NEXT: slli.w $a0, $a2, 1 ; LA32-NEXT: addi.w $a2, $a0, 1 ; LA32-NEXT: xvreplgr2vr.w $xr1, $a2 ; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1 diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll index 2f1db43e68fe..a13bdf9aeaf9 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -268,7 +268,7 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI15_0) ; LA32-NEXT: xvld $xr0, $a5, %pc_lo12(.LCPI15_0) -; LA32-NEXT: add.w $a4, $a4, $a4 +; LA32-NEXT: slli.w $a4, $a4, 1 ; LA32-NEXT: xvld $xr1, $a0, 0 ; LA32-NEXT: xvreplgr2vr.w $xr2, $a4 ; LA32-NEXT: xvseq.w $xr2, $xr2, $xr0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll index b17a90e71e85..b25c2da9761b 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -167,7 +167,7 @@ define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { ; LA32-LABEL: extract_2xi64_idx: ; LA32: # %bb.0: ; LA32-NEXT: vld $vr0, $a0, 0 -; LA32-NEXT: add.w $a0, $a2, $a2 +; LA32-NEXT: slli.w $a0, $a2, 1 ; LA32-NEXT: addi.w $a2, $a0, 1 ; LA32-NEXT: vreplve.w $vr1, $vr0, $a2 ; LA32-NEXT: vreplve.w $vr0, $vr0, $a0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll index 496a1aed39fb..195016bb41b7 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -188,7 +188,7 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { ; LA32: # %bb.0: ; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI9_0) ; LA32-NEXT: vld $vr0, $a5, %pc_lo12(.LCPI9_0) -; LA32-NEXT: add.w $a4, $a4, $a4 +; LA32-NEXT: slli.w $a4, $a4, 1 ; LA32-NEXT: vld $vr1, $a0, 0 ; LA32-NEXT: vreplgr2vr.w $vr2, $a4 ; LA32-NEXT: vseq.w $vr2, $vr2, $vr0 diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll index c3889372b322..fea44035e42f 100644 --- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll +++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll @@ -1442,8 +1442,8 @@ define i64 @extract_sext_v2i64_vidx() nounwind { ; O32-BE-NEXT: addu $1, $2, $25 ; O32-BE-NEXT: lw $2, %got(i32)($1) ; O32-BE-NEXT: lw $2, 0($2) -; O32-BE-NEXT: addu $2, $2, $2 -; O32-BE-NEXT: addiu $3, $2, 1 +; O32-BE-NEXT: sll $2, $2, 1 +; O32-BE-NEXT: ori $3, $2, 1 ; O32-BE-NEXT: lw $1, %got(v2i64)($1) ; O32-BE-NEXT: ld.d $w0, 0($1) ; O32-BE-NEXT: addv.d $w0, $w0, $w0 @@ -1461,8 +1461,8 @@ define i64 @extract_sext_v2i64_vidx() nounwind { ; O32-LE-NEXT: addu $1, $2, $25 ; O32-LE-NEXT: lw $2, %got(i32)($1) ; O32-LE-NEXT: lw $2, 0($2) -; O32-LE-NEXT: addu $2, $2, $2 -; O32-LE-NEXT: addiu $3, $2, 1 +; O32-LE-NEXT: sll $2, $2, 1 +; O32-LE-NEXT: ori $3, $2, 1 ; O32-LE-NEXT: lw $1, %got(v2i64)($1) ; O32-LE-NEXT: ld.d $w0, 0($1) ; O32-LE-NEXT: addv.d $w0, $w0, $w0 @@ -1669,8 +1669,8 @@ define i64 @extract_zext_v2i64_vidx() nounwind { ; O32-BE-NEXT: addu $1, $2, $25 ; O32-BE-NEXT: lw $2, %got(i32)($1) ; O32-BE-NEXT: lw $2, 0($2) -; O32-BE-NEXT: addu $2, $2, $2 -; O32-BE-NEXT: addiu $3, $2, 1 +; O32-BE-NEXT: sll $2, $2, 1 +; O32-BE-NEXT: ori $3, $2, 1 ; O32-BE-NEXT: lw $1, %got(v2i64)($1) ; O32-BE-NEXT: ld.d $w0, 0($1) ; O32-BE-NEXT: addv.d $w0, $w0, $w0 @@ -1688,8 +1688,8 @@ define i64 @extract_zext_v2i64_vidx() nounwind { ; O32-LE-NEXT: addu $1, $2, $25 ; O32-LE-NEXT: lw $2, %got(i32)($1) ; O32-LE-NEXT: lw $2, 0($2) -; O32-LE-NEXT: addu $2, $2, $2 -; O32-LE-NEXT: addiu $3, $2, 1 +; O32-LE-NEXT: sll $2, $2, 1 +; O32-LE-NEXT: ori $3, $2, 1 ; O32-LE-NEXT: lw $1, %got(v2i64)($1) ; O32-LE-NEXT: ld.d $w0, 0($1) ; O32-LE-NEXT: addv.d $w0, $w0, $w0 diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll index 01df6f02bf6e..d339c92f270d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -165,14 +165,14 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { ; ; CHECK-32-LABEL: testDoubleword: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: add 5, 6, 6 ; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: rlwinm 5, 6, 3, 28, 28 ; CHECK-32-NEXT: stxv 34, -32(1) -; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 28 -; CHECK-32-NEXT: stwx 3, 7, 6 -; CHECK-32-NEXT: addi 3, 5, 1 +; CHECK-32-NEXT: stwx 3, 7, 5 +; CHECK-32-NEXT: slwi 3, 6, 1 ; CHECK-32-NEXT: addi 5, 1, -16 ; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: addi 3, 3, 1 ; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 ; CHECK-32-NEXT: stxv 0, -16(1) ; CHECK-32-NEXT: stwx 4, 5, 3 @@ -187,8 +187,8 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { ; ; CHECK-32-P10-LABEL: testDoubleword: ; CHECK-32-P10: # %bb.0: # %entry -; CHECK-32-P10-NEXT: add 5, 6, 6 -; CHECK-32-P10-NEXT: slwi 6, 5, 2 +; CHECK-32-P10-NEXT: slwi 5, 6, 1 +; CHECK-32-P10-NEXT: slwi 6, 6, 3 ; CHECK-32-P10-NEXT: vinswlx 2, 6, 3 ; CHECK-32-P10-NEXT: addi 3, 5, 1 ; CHECK-32-P10-NEXT: slwi 3, 3, 2 diff --git a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll index b05cb1843482..cda3b4dac6d3 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll @@ -990,14 +990,15 @@ entry: define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { ; CHECK-LABEL: getvelsl: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add 5, 3, 3 +; CHECK-NEXT: rlwinm 5, 3, 3, 28, 28 +; CHECK-NEXT: slwi 3, 3, 1 ; CHECK-NEXT: addi 4, 1, -16 -; CHECK-NEXT: rlwinm 3, 5, 2, 28, 28 -; CHECK-NEXT: addi 5, 5, 1 +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: stxvw4x 34, 0, 4 -; CHECK-NEXT: rlwinm 5, 5, 2, 28, 29 -; CHECK-NEXT: lwzx 3, 4, 3 -; CHECK-NEXT: lwzx 4, 4, 5 +; CHECK-NEXT: lwzx 5, 4, 5 +; CHECK-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-NEXT: lwzx 4, 4, 3 +; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: blr entry: %vecext = extractelement <2 x i64> %vsl, i32 %i @@ -1008,14 +1009,15 @@ entry: define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { ; CHECK-LABEL: getvelul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add 5, 3, 3 +; CHECK-NEXT: rlwinm 5, 3, 3, 28, 28 +; CHECK-NEXT: slwi 3, 3, 1 ; CHECK-NEXT: addi 4, 1, -16 -; CHECK-NEXT: rlwinm 3, 5, 2, 28, 28 -; CHECK-NEXT: addi 5, 5, 1 +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: stxvw4x 34, 0, 4 -; CHECK-NEXT: rlwinm 5, 5, 2, 28, 29 -; CHECK-NEXT: lwzx 3, 4, 3 -; CHECK-NEXT: lwzx 4, 4, 5 +; CHECK-NEXT: lwzx 5, 4, 5 +; CHECK-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-NEXT: lwzx 4, 4, 3 +; CHECK-NEXT: mr 3, 5 ; CHECK-NEXT: blr entry: %vecext = extractelement <2 x i64> %vul, i32 %i diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll index 835ce40d8d0a..3b43a2b5a6d7 100644 --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -241,12 +241,12 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { ; ; AIX-P8-32-LABEL: testDoubleword: ; AIX-P8-32: # %bb.0: # %entry -; AIX-P8-32-NEXT: add r6, r6, r6 ; AIX-P8-32-NEXT: addi r5, r1, -16 -; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 28 +; AIX-P8-32-NEXT: rlwinm r7, r6, 3, 28, 28 ; AIX-P8-32-NEXT: stxvd2x v2, 0, r5 ; AIX-P8-32-NEXT: stwx r3, r5, r7 -; AIX-P8-32-NEXT: addi r3, r6, 1 +; AIX-P8-32-NEXT: slwi r3, r6, 1 +; AIX-P8-32-NEXT: addi r3, r3, 1 ; AIX-P8-32-NEXT: rlwinm r3, r3, 2, 28, 29 ; AIX-P8-32-NEXT: stwx r4, r5, r3 ; AIX-P8-32-NEXT: lxvd2x v2, 0, r5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 2515004952ae..9580d6caf08c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -621,7 +621,7 @@ define i64 @extractelt_v3i64_idx(<3 x i64> %a, i32 zeroext %idx) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v8 -; RV32-NEXT: add a0, a0, a0 +; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vx v10, v8, a0 ; RV32-NEXT: addi a1, a0, 1 diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 4751c9ce222e..a4af339b5ebb 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -1013,14 +1013,13 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: movl 8(%ebp), %ecx ; X86AVX2-NEXT: movl 12(%ebp), %edx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi -; X86AVX2-NEXT: andl $2, %esi -; X86AVX2-NEXT: movl %ecx, (%esp,%esi,4) +; X86AVX2-NEXT: leal 1(%eax,%eax), %esi +; X86AVX2-NEXT: andl $1, %eax +; X86AVX2-NEXT: movl %ecx, (%esp,%eax,8) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax -; X86AVX2-NEXT: andl $3, %eax -; X86AVX2-NEXT: movl %edx, 16(%esp,%eax,4) +; X86AVX2-NEXT: andl $3, %esi +; X86AVX2-NEXT: movl %edx, 16(%esp,%esi,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -1362,14 +1361,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi -; X86AVX2-NEXT: andl $2, %esi -; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) +; X86AVX2-NEXT: leal 1(%eax,%eax), %esi +; X86AVX2-NEXT: andl $1, %eax +; X86AVX2-NEXT: movl %edx, (%esp,%eax,8) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 ; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax -; X86AVX2-NEXT: andl $3, %eax -; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) +; X86AVX2-NEXT: andl $3, %esi +; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -1746,14 +1744,13 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { ; X86AVX2-NEXT: movl 8(%ebp), %ecx ; X86AVX2-NEXT: movl 12(%ebp), %edx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi -; X86AVX2-NEXT: andl $6, %esi -; X86AVX2-NEXT: movl %ecx, (%esp,%esi,4) +; X86AVX2-NEXT: leal 1(%eax,%eax), %esi +; X86AVX2-NEXT: andl $3, %eax +; X86AVX2-NEXT: movl %ecx, (%esp,%eax,8) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax -; X86AVX2-NEXT: andl $7, %eax -; X86AVX2-NEXT: movl %edx, 32(%esp,%eax,4) +; X86AVX2-NEXT: andl $7, %esi +; X86AVX2-NEXT: movl %edx, 32(%esp,%esi,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi @@ -2128,14 +2125,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl (%ecx), %edx ; X86AVX2-NEXT: movl 4(%ecx), %ecx ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) -; X86AVX2-NEXT: leal (%eax,%eax), %esi -; X86AVX2-NEXT: andl $6, %esi -; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) +; X86AVX2-NEXT: leal 1(%eax,%eax), %esi +; X86AVX2-NEXT: andl $3, %eax +; X86AVX2-NEXT: movl %edx, (%esp,%eax,8) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 ; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86AVX2-NEXT: leal 1(%eax,%eax), %eax -; X86AVX2-NEXT: andl $7, %eax -; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) +; X86AVX2-NEXT: andl $7, %esi +; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4) ; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 ; X86AVX2-NEXT: leal -4(%ebp), %esp ; X86AVX2-NEXT: popl %esi diff --git a/llvm/test/CodeGen/X86/vector-extend-inreg.ll b/llvm/test/CodeGen/X86/vector-extend-inreg.ll index 889ab6a0818e..7630255d3d08 100644 --- a/llvm/test/CodeGen/X86/vector-extend-inreg.ll +++ b/llvm/test/CodeGen/X86/vector-extend-inreg.ll @@ -11,7 +11,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X86-SSE-NEXT: movl %esp, %ebp ; X86-SSE-NEXT: andl $-16, %esp ; X86-SSE-NEXT: subl $272, %esp # imm = 0x110 -; X86-SSE-NEXT: movl 88(%ebp), %ecx +; X86-SSE-NEXT: movl 88(%ebp), %eax ; X86-SSE-NEXT: movdqa 72(%ebp), %xmm0 ; X86-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero ; X86-SSE-NEXT: xorps %xmm1, %xmm1 @@ -31,10 +31,9 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X86-SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movaps %xmm1, (%esp) ; X86-SSE-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal (%ecx,%ecx), %eax -; X86-SSE-NEXT: andl $31, %eax -; X86-SSE-NEXT: movl 128(%esp,%eax,4), %eax -; X86-SSE-NEXT: leal 1(%ecx,%ecx), %ecx +; X86-SSE-NEXT: leal 1(%eax,%eax), %ecx +; X86-SSE-NEXT: andl $15, %eax +; X86-SSE-NEXT: movl 128(%esp,%eax,8), %eax ; X86-SSE-NEXT: andl $31, %ecx ; X86-SSE-NEXT: movl (%esp,%ecx,4), %edx ; X86-SSE-NEXT: movl %ebp, %esp @@ -66,7 +65,7 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X86-AVX-NEXT: movl %esp, %ebp ; X86-AVX-NEXT: andl $-32, %esp ; X86-AVX-NEXT: subl $288, %esp # imm = 0x120 -; X86-AVX-NEXT: movl 40(%ebp), %ecx +; X86-AVX-NEXT: movl 40(%ebp), %eax ; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; X86-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) @@ -77,10 +76,9 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun ; X86-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp) ; X86-AVX-NEXT: vmovaps %ymm1, (%esp) ; X86-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) -; X86-AVX-NEXT: leal (%ecx,%ecx), %eax -; X86-AVX-NEXT: andl $31, %eax -; X86-AVX-NEXT: movl 128(%esp,%eax,4), %eax -; X86-AVX-NEXT: leal 1(%ecx,%ecx), %ecx +; X86-AVX-NEXT: leal 1(%eax,%eax), %ecx +; X86-AVX-NEXT: andl $15, %eax +; X86-AVX-NEXT: movl 128(%esp,%eax,8), %eax ; X86-AVX-NEXT: andl $31, %ecx ; X86-AVX-NEXT: movl (%esp,%ecx,4), %edx ; X86-AVX-NEXT: movl %ebp, %esp