- Make v8f16 a legal type so that arguments can be passed in vector registers. Handle fp16 vectors so that they have the same ABI as other fp vectors. - Set the preferred vector action for fp16 vectors to "split". This will scalarize all operations, which is not always necessary (like with memory operations), but it avoids the superfluous operations that result after first widening and then scalarizing a narrow vector (like v4f16). Fixes #168992
520 lines
21 KiB
LLVM
520 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s --check-prefix=VECTOR
|
|
;
|
|
; Test some fp16 vector operations, which must be scalarized. With less than
|
|
; 8 elements there should only be operations emitted for the used elements.
|
|
|
|
%Ty0 = type <8 x half>
|
|
define void @fun0(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r13, -56
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -288
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 448
|
|
; CHECK-NEXT: std %f8, 280(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f9, 272(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f10, 264(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f11, 256(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f12, 248(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f13, 240(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f14, 232(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f15, 224(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: .cfi_offset %f8, -168
|
|
; CHECK-NEXT: .cfi_offset %f9, -176
|
|
; CHECK-NEXT: .cfi_offset %f10, -184
|
|
; CHECK-NEXT: .cfi_offset %f11, -192
|
|
; CHECK-NEXT: .cfi_offset %f12, -200
|
|
; CHECK-NEXT: .cfi_offset %f13, -208
|
|
; CHECK-NEXT: .cfi_offset %f14, -216
|
|
; CHECK-NEXT: .cfi_offset %f15, -224
|
|
; CHECK-NEXT: lgh %r0, 14(%r2)
|
|
; CHECK-NEXT: lgr %r13, %r3
|
|
; CHECK-NEXT: lgh %r1, 12(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: stg %r0, 216(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r0, 10(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: stg %r1, 208(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r1, 8(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: stg %r0, 200(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r0, 6(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: stg %r1, 192(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r1, 4(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: stg %r0, 176(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r0, 2(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: stg %r1, 160(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r1, 0(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: ldgr %f8, %r0
|
|
; CHECK-NEXT: lgh %r0, 30(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: ldgr %f13, %r1
|
|
; CHECK-NEXT: lgh %r1, 28(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: stg %r0, 184(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r0, 26(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: stg %r1, 168(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: lgh %r1, 24(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: lgh %r3, 22(%r2)
|
|
; CHECK-NEXT: ldgr %f10, %r0
|
|
; CHECK-NEXT: sllg %r0, %r1, 48
|
|
; CHECK-NEXT: ldgr %f11, %r0
|
|
; CHECK-NEXT: sllg %r0, %r3, 48
|
|
; CHECK-NEXT: lgh %r1, 20(%r2)
|
|
; CHECK-NEXT: ldgr %f12, %r0
|
|
; CHECK-NEXT: lgh %r0, 18(%r2)
|
|
; CHECK-NEXT: lgh %r2, 16(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: ldgr %f14, %r1
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: sllg %r1, %r2, 48
|
|
; CHECK-NEXT: ldgr %f0, %r1
|
|
; CHECK-NEXT: ldgr %f15, %r0
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ler %f0, %f13
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f9
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f13, %f0
|
|
; CHECK-NEXT: ler %f0, %f15
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ler %f0, %f8
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f9
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f8, %f0
|
|
; CHECK-NEXT: ler %f0, %f14
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ld %f0, 160(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f9
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ler %f0, %f12
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f12, %f0
|
|
; CHECK-NEXT: ld %f0, 176(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f12
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f14, %f0
|
|
; CHECK-NEXT: ler %f0, %f11
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f11, %f0
|
|
; CHECK-NEXT: ld %f0, 192(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f11
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f11, %f0
|
|
; CHECK-NEXT: ler %f0, %f10
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f10, %f0
|
|
; CHECK-NEXT: ld %f0, 200(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f10
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f10, %f0
|
|
; CHECK-NEXT: ld %f0, 168(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f12, %f0
|
|
; CHECK-NEXT: ld %f0, 208(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f12
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f12, %f0
|
|
; CHECK-NEXT: ld %f0, 184(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f15, %f0
|
|
; CHECK-NEXT: ld %f0, 216(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: aebr %f0, %f15
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; CHECK-NEXT: lgdr %r0, %f0
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 14(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f12
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 12(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f10
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 10(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f11
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 8(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f14
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 6(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f9
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 4(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f8
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 2(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f13
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 0(%r13)
|
|
; CHECK-NEXT: ld %f8, 280(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f9, 272(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f10, 264(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f11, 256(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f12, 248(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f13, 240(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f14, 232(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f15, 224(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: lmg %r13, %r15, 392(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun0:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -248
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 408
|
|
; VECTOR-NEXT: std %f8, 240(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: vl %v0, 16(%r2), 3
|
|
; VECTOR-NEXT: mvc 160(16,%r15), 0(%r2) # 16-byte Folded Spill
|
|
; VECTOR-NEXT: lgr %r13, %r3
|
|
; VECTOR-NEXT: vst %v0, 176(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vreph %v0, %v0, 7
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 7
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 6
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 6
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vmrhh %v0, %v0, %v1
|
|
; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 5
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 5
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 4
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 4
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vmrhh %v0, %v0, %v1
|
|
; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vmrhf %v0, %v0, %v1
|
|
; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 3
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 3
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 2
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 2
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vmrhh %v0, %v0, %v1
|
|
; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vst %v0, 224(%r15), 3 # 16-byte Spill
|
|
; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 1
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f8, %f0
|
|
; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vreph %v0, %v0, 1
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: aebr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: vl %v1, 224(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0
|
|
; VECTOR-NEXT: vmrhh %v0, %v1, %v0
|
|
; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: vmrhf %v0, %v0, %v1
|
|
; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload
|
|
; VECTOR-NEXT: ld %f8, 240(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: vmrhg %v0, %v0, %v1
|
|
; VECTOR-NEXT: vst %v0, 0(%r13), 3
|
|
; VECTOR-NEXT: lmg %r13, %r15, 352(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%LHS = load %Ty0, ptr %Src
|
|
%S2 = getelementptr %Ty0, ptr %Src, i32 1
|
|
%RHS = load %Ty0, ptr %S2
|
|
%Res = fadd %Ty0 %LHS, %RHS
|
|
store %Ty0 %Res, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
%Ty1 = type <4 x half>
|
|
define void @fun1(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; CHECK-NEXT: .cfi_offset %r13, -56
|
|
; CHECK-NEXT: .cfi_offset %r14, -48
|
|
; CHECK-NEXT: .cfi_offset %r15, -40
|
|
; CHECK-NEXT: aghi %r15, -224
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 384
|
|
; CHECK-NEXT: std %f8, 216(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f9, 208(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f10, 200(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f11, 192(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f12, 184(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f13, 176(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f14, 168(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: std %f15, 160(%r15) # 8-byte Spill
|
|
; CHECK-NEXT: .cfi_offset %f8, -168
|
|
; CHECK-NEXT: .cfi_offset %f9, -176
|
|
; CHECK-NEXT: .cfi_offset %f10, -184
|
|
; CHECK-NEXT: .cfi_offset %f11, -192
|
|
; CHECK-NEXT: .cfi_offset %f12, -200
|
|
; CHECK-NEXT: .cfi_offset %f13, -208
|
|
; CHECK-NEXT: .cfi_offset %f14, -216
|
|
; CHECK-NEXT: .cfi_offset %f15, -224
|
|
; CHECK-NEXT: lgh %r0, 6(%r2)
|
|
; CHECK-NEXT: lgr %r13, %r3
|
|
; CHECK-NEXT: lgh %r1, 4(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: ldgr %f8, %r0
|
|
; CHECK-NEXT: lgh %r0, 2(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: ldgr %f9, %r1
|
|
; CHECK-NEXT: lgh %r1, 0(%r2)
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: lgh %r3, 14(%r2)
|
|
; CHECK-NEXT: ldgr %f12, %r0
|
|
; CHECK-NEXT: sllg %r0, %r1, 48
|
|
; CHECK-NEXT: ldgr %f10, %r0
|
|
; CHECK-NEXT: sllg %r0, %r3, 48
|
|
; CHECK-NEXT: lgh %r1, 12(%r2)
|
|
; CHECK-NEXT: ldgr %f11, %r0
|
|
; CHECK-NEXT: lgh %r0, 10(%r2)
|
|
; CHECK-NEXT: lgh %r2, 8(%r2)
|
|
; CHECK-NEXT: sllg %r1, %r1, 48
|
|
; CHECK-NEXT: ldgr %f13, %r1
|
|
; CHECK-NEXT: sllg %r0, %r0, 48
|
|
; CHECK-NEXT: sllg %r1, %r2, 48
|
|
; CHECK-NEXT: ldgr %f0, %r1
|
|
; CHECK-NEXT: ldgr %f14, %r0
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f15, %f0
|
|
; CHECK-NEXT: ler %f0, %f10
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: sebr %f0, %f15
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f10, %f0
|
|
; CHECK-NEXT: ler %f0, %f14
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f14, %f0
|
|
; CHECK-NEXT: ler %f0, %f12
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: sebr %f0, %f14
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f12, %f0
|
|
; CHECK-NEXT: ler %f0, %f13
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f13, %f0
|
|
; CHECK-NEXT: ler %f0, %f9
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: sebr %f0, %f13
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: ler %f9, %f0
|
|
; CHECK-NEXT: ler %f0, %f11
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: ler %f11, %f0
|
|
; CHECK-NEXT: ler %f0, %f8
|
|
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; CHECK-NEXT: sebr %f0, %f11
|
|
; CHECK-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
|
|
; CHECK-NEXT: lgdr %r0, %f0
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 6(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f9
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 4(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f12
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 2(%r13)
|
|
; CHECK-NEXT: lgdr %r0, %f10
|
|
; CHECK-NEXT: srlg %r0, %r0, 48
|
|
; CHECK-NEXT: sth %r0, 0(%r13)
|
|
; CHECK-NEXT: ld %f8, 216(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f9, 208(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f10, 200(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f11, 192(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f12, 184(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f13, 176(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f14, 168(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: ld %f15, 160(%r15) # 8-byte Reload
|
|
; CHECK-NEXT: lmg %r13, %r15, 328(%r15)
|
|
; CHECK-NEXT: br %r14
|
|
;
|
|
; VECTOR-LABEL: fun1:
|
|
; VECTOR: # %bb.0:
|
|
; VECTOR-NEXT: stmg %r13, %r15, 104(%r15)
|
|
; VECTOR-NEXT: .cfi_offset %r13, -56
|
|
; VECTOR-NEXT: .cfi_offset %r14, -48
|
|
; VECTOR-NEXT: .cfi_offset %r15, -40
|
|
; VECTOR-NEXT: aghi %r15, -224
|
|
; VECTOR-NEXT: .cfi_def_cfa_offset 384
|
|
; VECTOR-NEXT: std %f8, 216(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f9, 208(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f10, 200(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f11, 192(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f12, 184(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f13, 176(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f14, 168(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: std %f15, 160(%r15) # 8-byte Spill
|
|
; VECTOR-NEXT: .cfi_offset %f8, -168
|
|
; VECTOR-NEXT: .cfi_offset %f9, -176
|
|
; VECTOR-NEXT: .cfi_offset %f10, -184
|
|
; VECTOR-NEXT: .cfi_offset %f11, -192
|
|
; VECTOR-NEXT: .cfi_offset %f12, -200
|
|
; VECTOR-NEXT: .cfi_offset %f13, -208
|
|
; VECTOR-NEXT: .cfi_offset %f14, -216
|
|
; VECTOR-NEXT: .cfi_offset %f15, -224
|
|
; VECTOR-NEXT: vlreph %v0, 8(%r2)
|
|
; VECTOR-NEXT: vlreph %v8, 6(%r2)
|
|
; VECTOR-NEXT: vlreph %v9, 4(%r2)
|
|
; VECTOR-NEXT: vlreph %v10, 2(%r2)
|
|
; VECTOR-NEXT: lgr %r13, %r3
|
|
; VECTOR-NEXT: vlreph %v11, 0(%r2)
|
|
; VECTOR-NEXT: vlreph %v12, 14(%r2)
|
|
; VECTOR-NEXT: vlreph %v13, 12(%r2)
|
|
; VECTOR-NEXT: vlreph %v14, 10(%r2)
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f15, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f11
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: sebr %f0, %f15
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ldr %f11, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f14
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f14, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f10
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: sebr %f0, %f14
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ldr %f10, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f13
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f13, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f9
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: sebr %f0, %f13
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: ldr %f9, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f12
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: ldr %f12, %f0
|
|
; VECTOR-NEXT: ldr %f0, %f8
|
|
; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT
|
|
; VECTOR-NEXT: sebr %f0, %f12
|
|
; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT
|
|
; VECTOR-NEXT: vsteh %v9, 4(%r13), 0
|
|
; VECTOR-NEXT: vsteh %v10, 2(%r13), 0
|
|
; VECTOR-NEXT: vsteh %v11, 0(%r13), 0
|
|
; VECTOR-NEXT: ld %f8, 216(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f9, 208(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f10, 200(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f11, 192(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f12, 184(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f13, 176(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f14, 168(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: ld %f15, 160(%r15) # 8-byte Reload
|
|
; VECTOR-NEXT: vsteh %v0, 6(%r13), 0
|
|
; VECTOR-NEXT: lmg %r13, %r15, 328(%r15)
|
|
; VECTOR-NEXT: br %r14
|
|
%LHS = load %Ty1, ptr %Src
|
|
%S2 = getelementptr %Ty1, ptr %Src, i32 1
|
|
%RHS = load %Ty1, ptr %S2
|
|
%Res = fsub %Ty1 %LHS, %RHS
|
|
store %Ty1 %Res, ptr %Dst
|
|
ret void
|
|
}
|