- Make v8f16 a legal type so that arguments can be passed in vector registers. Handle fp16 vectors so that they have the same ABI as other fp vectors. - Set the preferred vector action for fp16 vectors to "split". This will scalarize all operations, which is not always necessary (like with memory operations), but it avoids the superfluous operations that result after first widening and then scalarizing a narrow vector (like v4f16). Fixes #168992
179 lines
6.6 KiB
LLVM
179 lines
6.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
|
|
;
|
|
; Test conversions between different-sized float elements.
|
|
|
|
; Test cases where both elements of a v2f64 are converted to f16s.
|
|
define void @f1(<2 x double> %val, ptr %ptr) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r13, -56
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -176
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 336
|
|
; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: .cfi_offset %f8, -168
|
|
; CHECK-NEXT: .cfi_offset %f9, -176
|
|
; CHECK-NEXT: lgr %r13, %r2
|
|
; CHECK-NEXT: ldr %f8, %f2
|
|
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ldr %f0, %f8
|
|
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; CHECK-NEXT: lgdr %r0, %f0
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 2(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f9
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 0(%r13)
|
|
; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: lmg %r13, %r15, 280(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: f1:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -184
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 344
|
|
; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: lgr %r13, %r2
|
|
; VECTOR-NEXT: vst %v24, 160(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vrepg %v0, %v24, 1
|
|
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; VECTOR-NEXT: vsteh %v8, 2(%r13), 0
|
|
; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r13), 0
|
|
; VECTOR-NEXT: lmg %r13, %r15, 288(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%res = fptrunc <2 x double> %val to <2 x half>
|
|
store <2 x half> %res, ptr %ptr
|
|
ret void
|
|
}
|
|
|
|
; Test conversion of an f64 in a vector register to an f16.
|
|
define half @f2(<2 x double> %vec) {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -160
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 320
|
|
; CHECK-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; CHECK-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: f2:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -160
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 320
|
|
; VECTOR-NEXT: vlr %v0, %v24
|
|
; VECTOR-NEXT: # kill: def $f0d killed $f0d killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; VECTOR-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%scalar = extractelement <2 x double> %vec, i32 0
|
|
%ret = fptrunc double %scalar to half
|
|
ret half %ret
|
|
}
|
|
|
|
; Test cases where even elements of a v4f16 are converted to f64s.
|
|
define <2 x double> @f3(<4 x half> %vec) {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -176
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 336
|
|
; CHECK-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: .cfi_offset %f8, -168
|
|
; CHECK-NEXT: .cfi_offset %f9, -176
|
|
; CHECK-NEXT: ler %f8, %f4
|
|
; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; CHECK-NEXT: ldr %f9, %f0
|
|
; CHECK-NEXT: ler %f0, %f8
|
|
; CHECK-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; CHECK-NEXT: ldr %f2, %f0
|
|
; CHECK-NEXT: ldr %f0, %f9
|
|
; CHECK-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: f3:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -192
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 352
|
|
; VECTOR-NEXT: vreph %v1, %v24, 2
|
|
; VECTOR-NEXT: vlr %v0, %v24
|
|
; VECTOR-NEXT: vst %v1, 176(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0
|
|
; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0d killed $f0d def $v0
|
|
; VECTOR-NEXT: vmrhg %v24, %v1, %v0
|
|
; VECTOR-NEXT: lmg %r14, %r15, 304(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%shuffle = shufflevector <4 x half> %vec, <4 x half> %vec, <2 x i32> <i32 0, i32 2>
|
|
%res = fpext <2 x half> %shuffle to <2 x double>
|
|
ret <2 x double> %res
|
|
}
|
|
|
|
; Test conversion of an f16 in a vector register to an f32, constant element index.
|
|
define float @f4(<4 x half> %vec) {
|
|
; CHECK-LABEL: f4:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -160
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 320
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: f4:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -160
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 320
|
|
; VECTOR-NEXT: vlr %v0, %v24
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%scalar = extractelement <4 x half> %vec, i32 0
|
|
%ret = fpext half %scalar to float
|
|
ret float %ret
|
|
}
|