Avoid ADD(X,X) as it doesn't correctly handle undef elements and helps avoid some FREEZE() fold headaches Resurrects #86857
372 lines
13 KiB
LLVM
372 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
|
|
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
|
|
|
|
define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind {
|
|
; CHECK-LABEL: insert_32xi8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr0, $a0, 0
|
|
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
|
|
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18
|
|
; CHECK-NEXT: xvextrins.b $xr0, $xr1, 17
|
|
; CHECK-NEXT: xvst $xr0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <32 x i8>, ptr %src
|
|
%v_new = insertelement <32 x i8> %v, i8 %in, i32 1
|
|
store <32 x i8> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind {
|
|
; CHECK-LABEL: insert_32xi8_upper:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr0, $a0, 0
|
|
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
|
|
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48
|
|
; CHECK-NEXT: xvextrins.b $xr0, $xr1, 0
|
|
; CHECK-NEXT: xvst $xr0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <32 x i8>, ptr %src
|
|
%v_new = insertelement <32 x i8> %v, i8 %in, i32 16
|
|
store <32 x i8> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_32xi8_undef(ptr %dst, i8 %in) nounwind {
|
|
; CHECK-LABEL: insert_32xi8_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
|
|
; CHECK-NEXT: xvst $xr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v = insertelement <32 x i8> poison, i8 %in, i32 1
|
|
store <32 x i8> %v, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_32xi8_undef_upper(ptr %dst, i8 %in) nounwind {
|
|
; CHECK-LABEL: insert_32xi8_undef_upper:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 6
|
|
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2
|
|
; CHECK-NEXT: xvst $xr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v = insertelement <32 x i8> poison, i8 %in, i32 22
|
|
store <32 x i8> %v, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind {
|
|
; CHECK-LABEL: insert_16xi16:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr0, $a0, 0
|
|
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
|
|
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 18
|
|
; CHECK-NEXT: xvextrins.h $xr0, $xr1, 17
|
|
; CHECK-NEXT: xvst $xr0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <16 x i16>, ptr %src
|
|
%v_new = insertelement <16 x i16> %v, i16 %in, i32 1
|
|
store <16 x i16> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind {
|
|
; CHECK-LABEL: insert_16xi16_upper:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr0, $a0, 0
|
|
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
|
|
; CHECK-NEXT: xvpermi.q $xr1, $xr0, 48
|
|
; CHECK-NEXT: xvextrins.h $xr0, $xr1, 0
|
|
; CHECK-NEXT: xvst $xr0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <16 x i16>, ptr %src
|
|
%v_new = insertelement <16 x i16> %v, i16 %in, i32 8
|
|
store <16 x i16> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_16xi16_undef(ptr %dst, i16 %in) nounwind {
|
|
; CHECK-LABEL: insert_16xi16_undef:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
|
|
; CHECK-NEXT: xvst $xr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v = insertelement <16 x i16> poison, i16 %in, i32 1
|
|
store <16 x i16> %v, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_16xi16_undef_upper(ptr %dst, i16 %in) nounwind {
|
|
; CHECK-LABEL: insert_16xi16_undef_upper:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
|
|
; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2
|
|
; CHECK-NEXT: xvst $xr0, $a0, 0
|
|
; CHECK-NEXT: ret
|
|
%v = insertelement <16 x i16> poison, i16 %in, i32 10
|
|
store <16 x i16> %v, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind {
|
|
; CHECK-LABEL: insert_8xi32:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr0, $a0, 0
|
|
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1
|
|
; CHECK-NEXT: xvst $xr0, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <8 x i32>, ptr %src
|
|
%v_new = insertelement <8 x i32> %v, i32 %in, i32 1
|
|
store <8 x i32> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind {
|
|
; LA32-LABEL: insert_4xi64:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: xvld $xr0, $a0, 0
|
|
; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2
|
|
; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 3
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_4xi64:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: xvld $xr0, $a0, 0
|
|
; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 1
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <4 x i64>, ptr %src
|
|
%v_new = insertelement <4 x i64> %v, i64 %in, i32 1
|
|
store <4 x i64> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind {
|
|
; CHECK-LABEL: insert_8xfloat:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr1, $a0, 0
|
|
; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
|
|
; CHECK-NEXT: xvinsve0.w $xr1, $xr0, 1
|
|
; CHECK-NEXT: xvst $xr1, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <8 x float>, ptr %src
|
|
%v_new = insertelement <8 x float> %v, float %in, i32 1
|
|
store <8 x float> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
|
|
; CHECK-LABEL: insert_4xdouble:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: xvld $xr1, $a0, 0
|
|
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
|
|
; CHECK-NEXT: xvinsve0.d $xr1, $xr0, 1
|
|
; CHECK-NEXT: xvst $xr1, $a1, 0
|
|
; CHECK-NEXT: ret
|
|
%v = load volatile <4 x double>, ptr %src
|
|
%v_new = insertelement <4 x double> %v, double %in, i32 1
|
|
store <4 x double> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_32xi8_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI12_0)
|
|
; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI12_0)
|
|
; LA32-NEXT: xvld $xr1, $a0, 0
|
|
; LA32-NEXT: xvreplgr2vr.b $xr2, $a3
|
|
; LA32-NEXT: xvseq.b $xr0, $xr2, $xr0
|
|
; LA32-NEXT: xvreplgr2vr.b $xr2, $a2
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_32xi8_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI12_0)
|
|
; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI12_0)
|
|
; LA64-NEXT: xvld $xr1, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
|
|
; LA64-NEXT: xvseq.b $xr0, $xr2, $xr0
|
|
; LA64-NEXT: xvreplgr2vr.b $xr2, $a2
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <32 x i8>, ptr %src
|
|
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
|
|
store <32 x i8> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_16xi16_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI13_0)
|
|
; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI13_0)
|
|
; LA32-NEXT: xvld $xr1, $a0, 0
|
|
; LA32-NEXT: xvreplgr2vr.h $xr2, $a3
|
|
; LA32-NEXT: xvseq.h $xr0, $xr2, $xr0
|
|
; LA32-NEXT: xvreplgr2vr.h $xr2, $a2
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_16xi16_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI13_0)
|
|
; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI13_0)
|
|
; LA64-NEXT: xvld $xr1, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.h $xr2, $a0
|
|
; LA64-NEXT: xvseq.h $xr0, $xr2, $xr0
|
|
; LA64-NEXT: xvreplgr2vr.h $xr2, $a2
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <16 x i16>, ptr %src
|
|
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
|
|
store <16 x i16> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_8xi32_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI14_0)
|
|
; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI14_0)
|
|
; LA32-NEXT: xvld $xr1, $a0, 0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr2, $a3
|
|
; LA32-NEXT: xvseq.w $xr0, $xr2, $xr0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr2, $a2
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_8xi32_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI14_0)
|
|
; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI14_0)
|
|
; LA64-NEXT: xvld $xr1, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.w $xr2, $a0
|
|
; LA64-NEXT: xvseq.w $xr0, $xr2, $xr0
|
|
; LA64-NEXT: xvreplgr2vr.w $xr2, $a2
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <8 x i32>, ptr %src
|
|
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
|
|
store <8 x i32> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_4xi64_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI15_0)
|
|
; LA32-NEXT: xvld $xr0, $a5, %pc_lo12(.LCPI15_0)
|
|
; LA32-NEXT: slli.w $a4, $a4, 1
|
|
; LA32-NEXT: xvld $xr1, $a0, 0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr2, $a4
|
|
; LA32-NEXT: xvseq.w $xr2, $xr2, $xr0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr3, $a2
|
|
; LA32-NEXT: xvbitsel.v $xr1, $xr1, $xr3, $xr2
|
|
; LA32-NEXT: addi.w $a0, $a4, 1
|
|
; LA32-NEXT: xvreplgr2vr.w $xr2, $a0
|
|
; LA32-NEXT: xvseq.w $xr0, $xr2, $xr0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr2, $a3
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_4xi64_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI15_0)
|
|
; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI15_0)
|
|
; LA64-NEXT: xvld $xr1, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.d $xr2, $a0
|
|
; LA64-NEXT: xvseq.d $xr0, $xr2, $xr0
|
|
; LA64-NEXT: xvreplgr2vr.d $xr2, $a2
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <4 x i64>, ptr %src
|
|
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
|
|
store <4 x i64> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_8xfloat_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI16_0)
|
|
; LA32-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI16_0)
|
|
; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
|
|
; LA32-NEXT: xvld $xr2, $a0, 0
|
|
; LA32-NEXT: xvreplgr2vr.w $xr3, $a2
|
|
; LA32-NEXT: xvseq.w $xr1, $xr3, $xr1
|
|
; LA32-NEXT: xvreplve0.w $xr0, $xr0
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_8xfloat_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
|
|
; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI16_0)
|
|
; LA64-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI16_0)
|
|
; LA64-NEXT: xvld $xr2, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.w $xr3, $a0
|
|
; LA64-NEXT: xvseq.w $xr1, $xr3, $xr1
|
|
; LA64-NEXT: xvreplve0.w $xr0, $xr0
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <8 x float>, ptr %src
|
|
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
|
|
store <8 x float> %v_new, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
|
|
; LA32-LABEL: insert_4xdouble_idx:
|
|
; LA32: # %bb.0:
|
|
; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
|
|
; LA32-NEXT: xvld $xr1, $a0, 0
|
|
; LA32-NEXT: xvrepli.b $xr2, 0
|
|
; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 0
|
|
; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
|
|
; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI17_0)
|
|
; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 2
|
|
; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 4
|
|
; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 6
|
|
; LA32-NEXT: xvseq.d $xr2, $xr2, $xr3
|
|
; LA32-NEXT: xvreplve0.d $xr0, $xr0
|
|
; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
|
|
; LA32-NEXT: xvst $xr0, $a1, 0
|
|
; LA32-NEXT: ret
|
|
;
|
|
; LA64-LABEL: insert_4xdouble_idx:
|
|
; LA64: # %bb.0:
|
|
; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
|
|
; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI17_0)
|
|
; LA64-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI17_0)
|
|
; LA64-NEXT: xvld $xr2, $a0, 0
|
|
; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
|
|
; LA64-NEXT: xvreplgr2vr.d $xr3, $a0
|
|
; LA64-NEXT: xvseq.d $xr1, $xr3, $xr1
|
|
; LA64-NEXT: xvreplve0.d $xr0, $xr0
|
|
; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
|
|
; LA64-NEXT: xvst $xr0, $a1, 0
|
|
; LA64-NEXT: ret
|
|
%v = load volatile <4 x double>, ptr %src
|
|
%v_new = insertelement <4 x double> %v, double %in, i32 %idx
|
|
store <4 x double> %v_new, ptr %dst
|
|
ret void
|
|
}
|