Files
llvm-project/llvm/test/CodeGen/SystemZ/fp-half-vector-conv.ll
Jonas Paulsson c999e9a4fe [SystemZ] Support fp16 vector ABI and basic codegen. (#171066)
- Make v8f16 a legal type so that arguments can be passed in vector
registers. Handle fp16 vectors so that they have the same ABI as other
fp vectors.

- Set the preferred vector action for fp16 vectors to "split". This will
scalarize all operations, which is not always necessary (like with
memory operations), but it avoids the superfluous operations that result
after first widening and then scalarizing a narrow vector (like v4f16).

Fixes #168992
2026-01-26 13:42:25 -06:00

179 lines
6.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
;
; Test conversions between different-sized float elements.
; Test cases where both elements of a v2f64 are converted to f16s.
define void @f1(<2 x double> %val, ptr %ptr) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
; CHECK-NEXT: .cfi_offset %r13, -56
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -176
; CHECK-NEXT: .cfi_def_cfa_offset 336
; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill
; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill
; CHECK-NEXT: .cfi_offset %f8, -168
; CHECK-NEXT: .cfi_offset %f9, -176
; CHECK-NEXT: lgr %r13, %r2
; CHECK-NEXT: ldr %f8, %f2
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
; CHECK-NEXT: ler %f9, %f0
; CHECK-NEXT: ldr %f0, %f8
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
; CHECK-NEXT: lgdr %r0, %f0
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 2(%r13)
; CHECK-NEXT: lgdr %r0, %f9
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 0(%r13)
; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload
; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload
; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: f1:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
; VECTOR-NEXT: .cfi_offset %r13, -56
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -184
; VECTOR-NEXT: .cfi_def_cfa_offset 344
; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill
; VECTOR-NEXT: .cfi_offset %f8, -168
; VECTOR-NEXT: lgr %r13, %r2
; VECTOR-NEXT: vst %v24, 160(%r15), 3 # 16-byte Spill
; VECTOR-NEXT: vrepg %v0, %v24, 1
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
; VECTOR-NEXT: ldr %f8, %f0
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
; VECTOR-NEXT: vsteh %v8, 2(%r13), 0
; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload
; VECTOR-NEXT: vsteh %v0, 0(%r13), 0
; VECTOR-NEXT: lmg %r13, %r15, 288(%r15)
; VECTOR-NEXT: br %r14
%res = fptrunc <2 x double> %val to <2 x half>
store <2 x half> %res, ptr %ptr
ret void
}
; Test conversion of an f64 in a vector register to an f16.
define half @f2(<2 x double> %vec) {
; CHECK-LABEL: f2:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
; CHECK-NEXT: lmg %r14, %r15, 272(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: f2:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -160
; VECTOR-NEXT: .cfi_def_cfa_offset 320
; VECTOR-NEXT: vlr %v0, %v24
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
; VECTOR-NEXT: lmg %r14, %r15, 272(%r15)
; VECTOR-NEXT: br %r14
%scalar = extractelement <2 x double> %vec, i32 0
%ret = fptrunc double %scalar to half
ret half %ret
}
; Test cases where even elements of a v4f16 are converted to f64s.
define <2 x double> @f3(<4 x half> %vec) {
; CHECK-LABEL: f3:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -176
; CHECK-NEXT: .cfi_def_cfa_offset 336
; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill
; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill
; CHECK-NEXT: .cfi_offset %f8, -168
; CHECK-NEXT: .cfi_offset %f9, -176
; CHECK-NEXT: ler %f8, %f4
; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT
; CHECK-NEXT: ldr %f9, %f0
; CHECK-NEXT: ler %f0, %f8
; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT
; CHECK-NEXT: ldr %f2, %f0
; CHECK-NEXT: ldr %f0, %f9
; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload
; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload
; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: f3:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -192
; VECTOR-NEXT: .cfi_def_cfa_offset 352
; VECTOR-NEXT: vreph %v1, %v24, 2
; VECTOR-NEXT: vlr %v0, %v24
; VECTOR-NEXT: vst %v1, 176(%r15), 3 # 16-byte Spill
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0
; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload
; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0
; VECTOR-NEXT: vmrhg %v24, %v1, %v0
; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
; VECTOR-NEXT: br %r14
%shuffle = shufflevector <4 x half> %vec, <4 x half> %vec, <2 x i32> <i32 0, i32 2>
%res = fpext <2 x half> %shuffle to <2 x double>
ret <2 x double> %res
}
; Test conversion of an f16 in a vector register to an f32, constant element index.
define float @f4(<4 x half> %vec) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0:
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: aghi %r15, -160
; CHECK-NEXT: .cfi_def_cfa_offset 320
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
; CHECK-NEXT: lmg %r14, %r15, 272(%r15)
; CHECK-NEXT: br %r14
;
; VECTOR-LABEL: f4:
; VECTOR: # %bb.0:
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
; VECTOR-NEXT: .cfi_offset %r14, -48
; VECTOR-NEXT: .cfi_offset %r15, -40
; VECTOR-NEXT: aghi %r15, -160
; VECTOR-NEXT: .cfi_def_cfa_offset 320
; VECTOR-NEXT: vlr %v0, %v24
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
; VECTOR-NEXT: lmg %r14, %r15, 272(%r15)
; VECTOR-NEXT: br %r14
%scalar = extractelement <4 x half> %vec, i32 0
%ret = fpext half %scalar to float
ret float %ret
}