Files
llvm-project/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
Simon Pilgrim d9bbb902fe [LegalizeTypes][DAG] Use SHL(X,1) instead of ADD(X,X) for variable vector indices for extraction/insertion legalization (#188277)
Avoid ADD(X,X) as it doesn't correctly handle undef elements and helps avoid some FREEZE() fold headaches

Resurrects #86857
2026-04-22 18:05:16 +00:00

268 lines
8.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_32xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 1
store i8 %e, ptr %dst
ret void
}
define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_16xi16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 1
store i16 %e, ptr %dst
ret void
}
define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
; LA32-LABEL: extract_8xi32:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_8xi32:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 1
; LA64-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 1
store i32 %e, ptr %dst
ret void
}
define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
; LA32-LABEL: extract_4xi64:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 3
; LA32-NEXT: st.w $a2, $a1, 4
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_4xi64:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 1
; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 1
store i64 %e, ptr %dst
ret void
}
define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 7
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 7
store float %e, ptr %dst
ret void
}
define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 3
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 3
store double %e, ptr %dst
ret void
}
define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_32xi8_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: movgr2fr.w $fa1, $a2
; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1
; LA32-NEXT: xvstelm.b $xr0, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_32xi8_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: movgr2fr.w $fa2, $a0
; LA64-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 %idx
store i8 %e, ptr %dst
ret void
}
define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_16xi16_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: movgr2fr.w $fa1, $a2
; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0
; LA32-NEXT: xvstelm.h $xr1, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_16xi16_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: movgr2fr.w $fa2, $a0
; LA64-NEXT: xvshuf.h $xr2, $xr1, $xr0
; LA64-NEXT: xvstelm.h $xr2, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 %idx
store i16 %e, ptr %dst
ret void
}
define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_8xi32_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_8xi32_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: xvreplgr2vr.w $xr1, $a0
; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1
; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 %idx
store i32 %e, ptr %dst
ret void
}
define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_4xi64_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: slli.w $a0, $a2, 1
; LA32-NEXT: addi.w $a2, $a0, 1
; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
; LA32-NEXT: xvperm.w $xr1, $xr0, $xr1
; LA32-NEXT: xvpickve2gr.w $a2, $xr1, 0
; LA32-NEXT: xvreplgr2vr.w $xr1, $a0
; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
; LA32-NEXT: st.w $a0, $a1, 0
; LA32-NEXT: st.w $a2, $a1, 4
; LA32-NEXT: ret
;
; LA64-LABEL: extract_4xi64_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: movgr2fr.w $fa2, $a0
; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 %idx
store i64 %e, ptr %dst
ret void
}
define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_8xfloat_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: xvreplgr2vr.w $xr1, $a2
; LA32-NEXT: xvperm.w $xr0, $xr0, $xr1
; LA32-NEXT: xvstelm.w $xr0, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_8xfloat_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: xvreplgr2vr.w $xr1, $a0
; LA64-NEXT: xvperm.w $xr0, $xr0, $xr1
; LA64-NEXT: xvstelm.w $xr0, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 %idx
store float %e, ptr %dst
ret void
}
define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-LABEL: extract_4xdouble_idx:
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a0, 0
; LA32-NEXT: movgr2fr.w $fa1, $a2
; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
; LA32-NEXT: xvshuf.d $xr1, $xr2, $xr0
; LA32-NEXT: xvstelm.d $xr1, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_4xdouble_idx:
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a0, 0
; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
; LA64-NEXT: movgr2fr.w $fa2, $a0
; LA64-NEXT: xvshuf.d $xr2, $xr1, $xr0
; LA64-NEXT: xvstelm.d $xr2, $a1, 0, 0
; LA64-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 %idx
store double %e, ptr %dst
ret void
}
define void @eliminate_frame_index(<8 x i32> %a) nounwind {
; LA32-LABEL: eliminate_frame_index:
; LA32: # %bb.0:
; LA32-NEXT: addi.w $sp, $sp, -1040
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
; LA32-NEXT: st.w $a0, $sp, 524
; LA32-NEXT: addi.w $sp, $sp, 1040
; LA32-NEXT: ret
;
; LA64-LABEL: eliminate_frame_index:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -1040
; LA64-NEXT: addi.d $a0, $sp, 524
; LA64-NEXT: xvstelm.w $xr0, $a0, 0, 1
; LA64-NEXT: addi.d $sp, $sp, 1040
; LA64-NEXT: ret
%1 = alloca [32 x [8 x i32]]
%2 = getelementptr i8, ptr %1, i64 508
%b = extractelement <8 x i32> %a, i64 1
store i32 %b, ptr %2
ret void
}