This is a relatively simple strategy as it is omitting any heuristics for liveness and register pressure reduction. This works well as the SystemZ ISel scheduler is using Sched::RegPressure which gives a good input order to begin with. It is trying harder with biasing phys regs than GenericScheduler as it also considers other instructions such as immediate loads directly into phys-regs produced by the register coalescer. This can hopefully be refactored into MachineScheduler.cpp. It has a latency heuristic that is slightly different from the one in GenericScheduler: It is activated for a specific type of region that have many "data sequences" consisting of SUs connected only with a single data-edge that are next to each other in the input order. This is only 3% of all the scheduling regions, but when activated it is applied on all the candidates (not just once per cycle). At the same time it is a bit more careful by checking not only the SU Height against the scheduled latency but also its Depth against the remaining latency. It reuses the GenericScheduler handling of weak edges to help copy coalescing. It also helps with compare zero elimination as it tries to put a CC-defining instruction that produces the compare source value above the compare before any other instruction clobbering CC or the value. This work was started after observing heavy spilling in Cactus, which was actually *caused* by GenericScheduler - disabling it (no pre-RA scheduling) remedied it and gave a 7% improvement in performance on that benchmark. Many different versions have been tried which has evolved into this initial simplistic MachineSchedStrategy that does relatively little and yet achieves double-digit improvements on Cactus and Imagick compared to GenericSched (which is OTOH 3% better on Blender). There will hopefully be more improvements added later on as there seems to be potential for it. It would be very interesting to have other OOO targets try this as well and perhaps make this available in MachineScheduler.cpp (A first attempt with improving the pre-RA scheduling was made with #90181, which however did not materialize in anything actually useful.)
613 lines
21 KiB
LLVM
613 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 -verify-machineinstrs \
|
|
; RUN: | FileCheck %s --check-prefix=NOVEC
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \
|
|
; RUN: | FileCheck %s --check-prefix=VECTOR
|
|
;
|
|
; Tests for 16-bit floating point (half).
|
|
|
|
; Incoming half arguments added together and returned.
|
|
define half @fun0(half %Op0, half %Op1) {
|
|
; NOVEC-LABEL: fun0:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -176
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 336
|
|
; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: .cfi_offset %f8, -168
|
|
; NOVEC-NEXT: .cfi_offset %f9, -176
|
|
; NOVEC-NEXT: ler %f8, %f0
|
|
; NOVEC-NEXT: ler %f0, %f2
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: ler %f9, %f0
|
|
; NOVEC-NEXT: ler %f0, %f8
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: aebr %f0, %f9
|
|
; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun0:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -176
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 336
|
|
; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: .cfi_offset %f9, -176
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f2
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f9
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%Res = fadd half %Op0, %Op1
|
|
ret half %Res
|
|
}
|
|
|
|
define half @fun1(half %Op0, half %Op1) {
|
|
; NOVEC-LABEL: fun1:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -176
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 336
|
|
; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: .cfi_offset %f8, -168
|
|
; NOVEC-NEXT: .cfi_offset %f9, -176
|
|
; NOVEC-NEXT: ler %f8, %f2
|
|
; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; NOVEC-NEXT: ldr %f9, %f0
|
|
; NOVEC-NEXT: ler %f0, %f8
|
|
; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; NOVEC-NEXT: adbr %f0, %f9
|
|
; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun1:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -176
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 336
|
|
; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: .cfi_offset %f9, -176
|
|
; VECTOR-NEXT: ldr %f8, %f2
|
|
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; VECTOR-NEXT: wfadb %f0, %f9, %f0
|
|
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%E0 = fpext half %Op0 to double
|
|
%E1 = fpext half %Op1 to double
|
|
%Add = fadd double %E0, %E1
|
|
%Res = fptrunc double %Add to half
|
|
ret half %Res
|
|
}
|
|
|
|
define half @fun2(half %Op0, half %Op1) {
|
|
; NOVEC-LABEL: fun2:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -232
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 392
|
|
; NOVEC-NEXT: std %f8, 224(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f9, 216(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f11, 208(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: .cfi_offset %f8, -168
|
|
; NOVEC-NEXT: .cfi_offset %f9, -176
|
|
; NOVEC-NEXT: .cfi_offset %f11, -184
|
|
; NOVEC-NEXT: la %r2, 160(%r15)
|
|
; NOVEC-NEXT: ler %f8, %f2
|
|
; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; NOVEC-NEXT: ld %f9, 160(%r15)
|
|
; NOVEC-NEXT: ld %f11, 168(%r15)
|
|
; NOVEC-NEXT: la %r2, 176(%r15)
|
|
; NOVEC-NEXT: ler %f0, %f8
|
|
; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; NOVEC-NEXT: ld %f0, 176(%r15)
|
|
; NOVEC-NEXT: ld %f2, 184(%r15)
|
|
; NOVEC-NEXT: la %r2, 192(%r15)
|
|
; NOVEC-NEXT: axbr %f0, %f9
|
|
; NOVEC-NEXT: std %f0, 192(%r15)
|
|
; NOVEC-NEXT: std %f2, 200(%r15)
|
|
; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT
|
|
; NOVEC-NEXT: ld %f8, 224(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f9, 216(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f11, 208(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: lmg %r14, %r15, 344(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun2:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -232
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 392
|
|
; VECTOR-NEXT: std %f8, 224(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: la %r2, 176(%r15)
|
|
; VECTOR-NEXT: ldr %f8, %f2
|
|
; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; VECTOR-NEXT: mvc 160(16,%r15), 176(%r15)
|
|
; VECTOR-NEXT: la %r2, 192(%r15)
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; VECTOR-NEXT: vl %v0, 192(%r15), 3
|
|
; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: wfaxb %v0, %v1, %v0
|
|
; VECTOR-NEXT: la %r2, 208(%r15)
|
|
; VECTOR-NEXT: vst %v0, 208(%r15), 3
|
|
; VECTOR-NEXT: brasl %r14, __trunctfhf2@PLT
|
|
; VECTOR-NEXT: ld %f8, 224(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: lmg %r14, %r15, 344(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%E0 = fpext half %Op0 to fp128
|
|
%E1 = fpext half %Op1 to fp128
|
|
%Add = fadd fp128 %E0, %E1
|
|
%Res = fptrunc fp128 %Add to half
|
|
ret half %Res
|
|
}
|
|
|
|
; Test loading and storing a half value.
|
|
define void @fun3(ptr %Src, ptr %Dst) {
|
|
; NOVEC-LABEL: fun3:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: lgh %r0, 0(%r2)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: sth %r0, 0(%r3)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun3:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: vlreph %v0, 0(%r2)
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r3), 0
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%L = load half, ptr %Src, align 2
|
|
store half %L, ptr %Dst, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @fun4(ptr %Src, ptr %Dst) {
|
|
; NOVEC-LABEL: fun4:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r13, -56
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -160
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 320
|
|
; NOVEC-NEXT: lgh %r0, 0(%r2)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: lgr %r13, %r3
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; NOVEC-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; NOVEC-NEXT: adbr %f0, %f0
|
|
; NOVEC-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: sth %r0, 0(%r13)
|
|
; NOVEC-NEXT: lmg %r13, %r15, 264(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun4:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -160
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 320
|
|
; VECTOR-NEXT: vlreph %v0, 0(%r2)
|
|
; VECTOR-NEXT: lgr %r13, %r3
|
|
; VECTOR-NEXT: brasl %r14, __extendhfdf2@PLT
|
|
; VECTOR-NEXT: adbr %f0, %f0
|
|
; VECTOR-NEXT: brasl %r14, __truncdfhf2@PLT
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r13), 0
|
|
; VECTOR-NEXT: lmg %r13, %r15, 264(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%Op0 = load half, ptr %Src, align 2
|
|
%E0 = fpext half %Op0 to double
|
|
%Add = fadd double %E0, %E0
|
|
%Res = fptrunc double %Add to half
|
|
store half %Res, ptr %Dst, align 2
|
|
ret void
|
|
}
|
|
|
|
define void @fun5(ptr %Src, ptr %Dst) {
|
|
; NOVEC-LABEL: fun5:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r13, -56
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -192
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 352
|
|
; NOVEC-NEXT: lgh %r0, 0(%r2)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: la %r2, 160(%r15)
|
|
; NOVEC-NEXT: lgr %r13, %r3
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; NOVEC-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; NOVEC-NEXT: ld %f0, 160(%r15)
|
|
; NOVEC-NEXT: ld %f2, 168(%r15)
|
|
; NOVEC-NEXT: la %r2, 176(%r15)
|
|
; NOVEC-NEXT: axbr %f0, %f0
|
|
; NOVEC-NEXT: std %f0, 176(%r15)
|
|
; NOVEC-NEXT: std %f2, 184(%r15)
|
|
; NOVEC-NEXT: brasl %r14, __trunctfhf2@PLT
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: sth %r0, 0(%r13)
|
|
; NOVEC-NEXT: lmg %r13, %r15, 296(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun5:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -192
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 352
|
|
; VECTOR-NEXT: vlreph %v0, 0(%r2)
|
|
; VECTOR-NEXT: la %r2, 160(%r15)
|
|
; VECTOR-NEXT: lgr %r13, %r3
|
|
; VECTOR-NEXT: brasl %r14, __extendhftf2@PLT
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3
|
|
; VECTOR-NEXT: wfaxb %v0, %v0, %v0
|
|
; VECTOR-NEXT: la %r2, 176(%r15)
|
|
; VECTOR-NEXT: vst %v0, 176(%r15), 3
|
|
; VECTOR-NEXT: brasl %r14, __trunctfhf2@PLT
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r13), 0
|
|
; VECTOR-NEXT: lmg %r13, %r15, 296(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%Op0 = load half, ptr %Src, align 2
|
|
%E0 = fpext half %Op0 to fp128
|
|
%Add = fadd fp128 %E0, %E0
|
|
%Res = fptrunc fp128 %Add to half
|
|
store half %Res, ptr %Dst, align 2
|
|
ret void
|
|
}
|
|
|
|
; Test a chain of half operations which should have each operation surrounded
|
|
; by conversions to/from fp32 for proper emulation.
|
|
define half @fun6(half %Op0, half %Op1, half %Op2) {
|
|
; NOVEC-LABEL: fun6:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -184
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 344
|
|
; NOVEC-NEXT: std %f8, 176(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f9, 168(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f10, 160(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: .cfi_offset %f8, -168
|
|
; NOVEC-NEXT: .cfi_offset %f9, -176
|
|
; NOVEC-NEXT: .cfi_offset %f10, -184
|
|
; NOVEC-NEXT: ler %f9, %f0
|
|
; NOVEC-NEXT: ler %f0, %f2
|
|
; NOVEC-NEXT: ler %f8, %f4
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: ler %f10, %f0
|
|
; NOVEC-NEXT: ler %f0, %f9
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: aebr %f0, %f10
|
|
; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: ler %f9, %f0
|
|
; NOVEC-NEXT: ler %f0, %f8
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: aebr %f0, %f9
|
|
; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; NOVEC-NEXT: ld %f8, 176(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f9, 168(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f10, 160(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: lmg %r14, %r15, 296(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun6:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -184
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 344
|
|
; VECTOR-NEXT: std %f8, 176(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f9, 168(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f10, 160(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: .cfi_offset %f9, -176
|
|
; VECTOR-NEXT: .cfi_offset %f10, -184
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f2
|
|
; VECTOR-NEXT: ldr %f8, %f4
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f10, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f9
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f10
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: wfasb %f0, %f9, %f0
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ld %f8, 176(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f9, 168(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f10, 160(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: lmg %r14, %r15, 296(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%A0 = fadd half %Op0, %Op1
|
|
%Res = fadd half %A0, %Op2
|
|
ret half %Res
|
|
}
|
|
|
|
; Store an incoming half argument and return a loaded one.
|
|
define half @fun7(half %Op0, ptr %Dst, ptr %Src) {
|
|
; NOVEC-LABEL: fun7:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: sth %r0, 0(%r2)
|
|
; NOVEC-NEXT: lgh %r0, 0(%r3)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun7:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r2), 0
|
|
; VECTOR-NEXT: vlreph %v0, 0(%r3)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
store half %Op0, ptr %Dst
|
|
%Res = load half, ptr %Src
|
|
ret half %Res
|
|
}
|
|
|
|
; Call a function with half argument and return values.
|
|
declare half @foo(half)
|
|
define void @fun8(ptr %Src, ptr %Dst) {
|
|
; NOVEC-LABEL: fun8:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r13, -56
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -160
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 320
|
|
; NOVEC-NEXT: lgh %r0, 0(%r2)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: lgr %r13, %r3
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; NOVEC-NEXT: brasl %r14, foo@PLT
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: sth %r0, 0(%r13)
|
|
; NOVEC-NEXT: lmg %r13, %r15, 264(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun8:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -160
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 320
|
|
; VECTOR-NEXT: vlreph %v0, 0(%r2)
|
|
; VECTOR-NEXT: lgr %r13, %r3
|
|
; VECTOR-NEXT: brasl %r14, foo@PLT
|
|
; VECTOR-NEXT: vsteh %v0, 0(%r13), 0
|
|
; VECTOR-NEXT: lmg %r13, %r15, 264(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
%arg = load half, ptr %Src
|
|
%Res = call half @foo(half %arg)
|
|
store half %Res, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; Receive stack argument.
|
|
define half @fun9(half %Arg0, half %Arg1, half %Arg2, half %Arg3, half %Arg4) {
|
|
; NOVEC-LABEL: fun9:
|
|
; NOVEC: # %bb.0:
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -176
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 336
|
|
; NOVEC-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; NOVEC-NEXT: .cfi_offset %f8, -168
|
|
; NOVEC-NEXT: .cfi_offset %f9, -176
|
|
; NOVEC-NEXT: lgh %r0, 342(%r15)
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: ler %f8, %f6
|
|
; NOVEC-NEXT: ldgr %f0, %r0
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: ler %f9, %f0
|
|
; NOVEC-NEXT: ler %f0, %f8
|
|
; NOVEC-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; NOVEC-NEXT: aebr %f0, %f9
|
|
; NOVEC-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; NOVEC-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; NOVEC-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun9:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -176
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 336
|
|
; VECTOR-NEXT: std %f8, 168(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f9, 160(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: .cfi_offset %f9, -176
|
|
; VECTOR-NEXT: vlreph %v0, 342(%r15)
|
|
; VECTOR-NEXT: ldr %f8, %f6
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f9
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ld %f8, 168(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f9, 160(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: lmg %r14, %r15, 288(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%A0 = fadd half %Arg3, %Arg4
|
|
ret half %A0
|
|
}
|
|
|
|
; Pass stack argument.
|
|
define void @fun10(half %Arg0) {
|
|
; NOVEC-LABEL: fun10:
|
|
; NOVEC: # %bb.0:
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -168
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 328
|
|
; NOVEC-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; NOVEC-NEXT: lgdr %r0, %f0
|
|
; NOVEC-NEXT: srlg %r0, %r0, 48
|
|
; NOVEC-NEXT: ler %f2, %f0
|
|
; NOVEC-NEXT: ler %f4, %f0
|
|
; NOVEC-NEXT: ler %f6, %f0
|
|
; NOVEC-NEXT: sth %r0, 166(%r15)
|
|
; NOVEC-NEXT: brasl %r14, fun9@PLT
|
|
; NOVEC-NEXT: lmg %r14, %r15, 280(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun10:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -168
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 328
|
|
; VECTOR-NEXT: ldr %f2, %f0
|
|
; VECTOR-NEXT: ldr %f4, %f0
|
|
; VECTOR-NEXT: ldr %f6, %f0
|
|
; VECTOR-NEXT: vsteh %v0, 166(%r15), 0
|
|
; VECTOR-NEXT: brasl %r14, fun9@PLT
|
|
; VECTOR-NEXT: lmg %r14, %r15, 280(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
call void @fun9(half %Arg0, half %Arg0, half %Arg0, half %Arg0, half %Arg0)
|
|
ret void
|
|
}
|
|
|
|
; Test loading some immediates from the Constant Pool.
|
|
declare void @foo2(half, half, half, half)
|
|
define void @fun11() {
|
|
; NOVEC-LABEL: fun11:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; NOVEC-NEXT: .cfi_offset %r14, -48
|
|
; NOVEC-NEXT: .cfi_offset %r15, -40
|
|
; NOVEC-NEXT: aghi %r15, -160
|
|
; NOVEC-NEXT: .cfi_def_cfa_offset 320
|
|
; NOVEC-NEXT: lghrl %r0, .LCPI11_0
|
|
; NOVEC-NEXT: lghrl %r1, .LCPI11_1
|
|
; NOVEC-NEXT: lzer %f2
|
|
; NOVEC-NEXT: lcdfr %f0, %f2
|
|
; NOVEC-NEXT: sllg %r0, %r0, 48
|
|
; NOVEC-NEXT: sllg %r1, %r1, 48
|
|
; NOVEC-NEXT: ldgr %f4, %r0
|
|
; NOVEC-NEXT: # kill: def $f4h killed $f4h killed $f4d
|
|
; NOVEC-NEXT: ldgr %f6, %r1
|
|
; NOVEC-NEXT: # kill: def $f6h killed $f6h killed $f6d
|
|
; NOVEC-NEXT: brasl %r14, foo2@PLT
|
|
; NOVEC-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; NOVEC-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun11:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: stmg %r14, %r15, 112(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -160
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 320
|
|
; VECTOR-NEXT: lzer %f2
|
|
; VECTOR-NEXT: vrepih %v4, 13824
|
|
; VECTOR-NEXT: vrepih %v6, 15360
|
|
; VECTOR-NEXT: lcdfr %f0, %f2
|
|
; VECTOR-NEXT: brasl %r14, foo2@PLT
|
|
; VECTOR-NEXT: lmg %r14, %r15, 272(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
entry:
|
|
call void @foo2(half -0.0, half 0.0, half 0.375, half 1.0)
|
|
ret void
|
|
}
|
|
|
|
; Test a tail call.
|
|
declare void @foo3(half)
|
|
define void @fun12(half %Arg0) {
|
|
; NOVEC-LABEL: fun12:
|
|
; NOVEC: # %bb.0: # %entry
|
|
; NOVEC-NEXT: jg foo3@PLT
|
|
;
|
|
; VECTOR-LABEL: fun12:
|
|
; VECTOR: # %bb.0: # %entry
|
|
; VECTOR-NEXT: jg foo3@PLT
|
|
entry:
|
|
tail call void @foo3(half %Arg0)
|
|
ret void
|
|
}
|