Reduce symbol table size for libraries. Object linking will more than likely divert from using symbols directly. Fixes ROCM-14907
697 lines
36 KiB
LLVM
697 lines
36 KiB
LLVM
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
; Functions that don't make calls should have constants as its resource usage as no resource information has to be propagated.
|
|
|
|
; GCN-LABEL: {{^}}use_vcc:
|
|
; GCN: .set .Luse_vcc.num_vgpr, 0
|
|
; GCN: .set .Luse_vcc.num_agpr, 0
|
|
; GCN: .set .Luse_vcc.numbered_sgpr, 32
|
|
; GCN: .set .Luse_vcc.private_seg_size, 0
|
|
; GCN: .set .Luse_vcc.uses_vcc, 1
|
|
; GCN: .set .Luse_vcc.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_vcc.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_vcc.has_recursion, 0
|
|
; GCN: .set .Luse_vcc.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 36
|
|
; GCN: NumVgprs: 0
|
|
; GCN: ScratchSize: 0
|
|
define void @use_vcc() #1 {
|
|
call void asm sideeffect "", "~{vcc}" () #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_vcc:
|
|
; GCN: .set .Lindirect_use_vcc.num_vgpr, max(41, .Luse_vcc.num_vgpr)
|
|
; GCN: .set .Lindirect_use_vcc.num_agpr, max(0, .Luse_vcc.num_agpr)
|
|
; GCN: .set .Lindirect_use_vcc.numbered_sgpr, max(34, .Luse_vcc.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_vcc.private_seg_size, 16+max(.Luse_vcc.private_seg_size)
|
|
; GCN: .set .Lindirect_use_vcc.uses_vcc, or(1, .Luse_vcc.uses_vcc)
|
|
; GCN: .set .Lindirect_use_vcc.uses_flat_scratch, or(0, .Luse_vcc.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_vcc.has_dyn_sized_stack, or(0, .Luse_vcc.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_vcc.has_recursion, or(0, .Luse_vcc.has_recursion)
|
|
; GCN: .set .Lindirect_use_vcc.has_indirect_call, or(0, .Luse_vcc.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define void @indirect_use_vcc() #1 {
|
|
call void @use_vcc()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.num_vgpr, max(32, .Lindirect_use_vcc.num_vgpr)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.num_agpr, max(0, .Lindirect_use_vcc.num_agpr)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.numbered_sgpr, max(33, .Lindirect_use_vcc.numbered_sgpr)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.private_seg_size, 0+max(.Lindirect_use_vcc.private_seg_size)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_vcc, or(1, .Lindirect_use_vcc.uses_vcc)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.uses_flat_scratch, or(1, .Lindirect_use_vcc.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_vcc.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.has_recursion, or(0, .Lindirect_use_vcc.has_recursion)
|
|
; GCN: .set .Lindirect_2level_use_vcc_kernel.has_indirect_call, or(0, .Lindirect_use_vcc.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 {
|
|
call void @indirect_use_vcc()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_flat_scratch:
|
|
; GCN: .set .Luse_flat_scratch.num_vgpr, 0
|
|
; GCN: .set .Luse_flat_scratch.num_agpr, 0
|
|
; GCN: .set .Luse_flat_scratch.numbered_sgpr, 32
|
|
; GCN: .set .Luse_flat_scratch.private_seg_size, 0
|
|
; GCN: .set .Luse_flat_scratch.uses_vcc, 0
|
|
; GCN: .set .Luse_flat_scratch.uses_flat_scratch, 1
|
|
; GCN: .set .Luse_flat_scratch.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_flat_scratch.has_recursion, 0
|
|
; GCN: .set .Luse_flat_scratch.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 0
|
|
; GCN: ScratchSize: 0
|
|
define void @use_flat_scratch() #1 {
|
|
call void asm sideeffect "", "~{flat_scratch}" () #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_flat_scratch:
|
|
; GCN: .set .Lindirect_use_flat_scratch.num_vgpr, max(41, .Luse_flat_scratch.num_vgpr)
|
|
; GCN: .set .Lindirect_use_flat_scratch.num_agpr, max(0, .Luse_flat_scratch.num_agpr)
|
|
; GCN: .set .Lindirect_use_flat_scratch.numbered_sgpr, max(34, .Luse_flat_scratch.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_flat_scratch.private_seg_size, 16+max(.Luse_flat_scratch.private_seg_size)
|
|
; GCN: .set .Lindirect_use_flat_scratch.uses_vcc, or(1, .Luse_flat_scratch.uses_vcc)
|
|
; GCN: .set .Lindirect_use_flat_scratch.uses_flat_scratch, or(0, .Luse_flat_scratch.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_flat_scratch.has_dyn_sized_stack, or(0, .Luse_flat_scratch.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_flat_scratch.has_recursion, or(0, .Luse_flat_scratch.has_recursion)
|
|
; GCN: .set .Lindirect_use_flat_scratch.has_indirect_call, or(0, .Luse_flat_scratch.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define void @indirect_use_flat_scratch() #1 {
|
|
call void @use_flat_scratch()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_vgpr, max(32, .Lindirect_use_flat_scratch.num_vgpr)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.num_agpr, max(0, .Lindirect_use_flat_scratch.num_agpr)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.numbered_sgpr, max(33, .Lindirect_use_flat_scratch.numbered_sgpr)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.private_seg_size, 0+max(.Lindirect_use_flat_scratch.private_seg_size)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_vcc, or(1, .Lindirect_use_flat_scratch.uses_vcc)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.uses_flat_scratch, or(1, .Lindirect_use_flat_scratch.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_dyn_sized_stack, or(0, .Lindirect_use_flat_scratch.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_recursion, or(0, .Lindirect_use_flat_scratch.has_recursion)
|
|
; GCN: .set .Lindirect_2level_use_flat_scratch_kernel.has_indirect_call, or(0, .Lindirect_use_flat_scratch.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 {
|
|
call void @indirect_use_flat_scratch()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_10_vgpr:
|
|
; GCN: .set .Luse_10_vgpr.num_vgpr, 10
|
|
; GCN: .set .Luse_10_vgpr.num_agpr, 0
|
|
; GCN: .set .Luse_10_vgpr.numbered_sgpr, 32
|
|
; GCN: .set .Luse_10_vgpr.private_seg_size, 0
|
|
; GCN: .set .Luse_10_vgpr.uses_vcc, 0
|
|
; GCN: .set .Luse_10_vgpr.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_10_vgpr.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_10_vgpr.has_recursion, 0
|
|
; GCN: .set .Luse_10_vgpr.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 36
|
|
; GCN: NumVgprs: 10
|
|
; GCN: ScratchSize: 0
|
|
define void @use_10_vgpr() #1 {
|
|
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
|
|
call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_10_vgpr:
|
|
; GCN: .set .Lindirect_use_10_vgpr.num_vgpr, max(41, .Luse_10_vgpr.num_vgpr)
|
|
; GCN: .set .Lindirect_use_10_vgpr.num_agpr, max(0, .Luse_10_vgpr.num_agpr)
|
|
; GCN: .set .Lindirect_use_10_vgpr.numbered_sgpr, max(34, .Luse_10_vgpr.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_10_vgpr.private_seg_size, 16+max(.Luse_10_vgpr.private_seg_size)
|
|
; GCN: .set .Lindirect_use_10_vgpr.uses_vcc, or(1, .Luse_10_vgpr.uses_vcc)
|
|
; GCN: .set .Lindirect_use_10_vgpr.uses_flat_scratch, or(0, .Luse_10_vgpr.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_10_vgpr.has_dyn_sized_stack, or(0, .Luse_10_vgpr.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_10_vgpr.has_recursion, or(0, .Luse_10_vgpr.has_recursion)
|
|
; GCN: .set .Lindirect_use_10_vgpr.has_indirect_call, or(0, .Luse_10_vgpr.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define void @indirect_use_10_vgpr() #0 {
|
|
call void @use_10_vgpr()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.num_vgpr, max(32, .Lindirect_use_10_vgpr.num_vgpr)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.num_agpr, max(0, .Lindirect_use_10_vgpr.num_agpr)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.numbered_sgpr, max(33, .Lindirect_use_10_vgpr.numbered_sgpr)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.private_seg_size, 0+max(.Lindirect_use_10_vgpr.private_seg_size)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_vcc, or(1, .Lindirect_use_10_vgpr.uses_vcc)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.uses_flat_scratch, or(1, .Lindirect_use_10_vgpr.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.has_dyn_sized_stack, or(0, .Lindirect_use_10_vgpr.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.has_recursion, or(0, .Lindirect_use_10_vgpr.has_recursion)
|
|
; GCN: .set .Lindirect_2_level_use_10_vgpr.has_indirect_call, or(0, .Lindirect_use_10_vgpr.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
|
|
call void @indirect_use_10_vgpr()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_50_vgpr:
|
|
; GCN: .set .Luse_50_vgpr.num_vgpr, 50
|
|
; GCN: .set .Luse_50_vgpr.num_agpr, 0
|
|
; GCN: .set .Luse_50_vgpr.numbered_sgpr, 32
|
|
; GCN: .set .Luse_50_vgpr.private_seg_size, 0
|
|
; GCN: .set .Luse_50_vgpr.uses_vcc, 0
|
|
; GCN: .set .Luse_50_vgpr.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_50_vgpr.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_50_vgpr.has_recursion, 0
|
|
; GCN: .set .Luse_50_vgpr.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 36
|
|
; GCN: NumVgprs: 50
|
|
; GCN: ScratchSize: 0
|
|
define void @use_50_vgpr() #1 {
|
|
call void asm sideeffect "", "~{v49}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_50_vgpr:
|
|
; GCN: .set .Lindirect_use_50_vgpr.num_vgpr, max(41, .Luse_50_vgpr.num_vgpr)
|
|
; GCN: .set .Lindirect_use_50_vgpr.num_agpr, max(0, .Luse_50_vgpr.num_agpr)
|
|
; GCN: .set .Lindirect_use_50_vgpr.numbered_sgpr, max(34, .Luse_50_vgpr.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_50_vgpr.private_seg_size, 16+max(.Luse_50_vgpr.private_seg_size)
|
|
; GCN: .set .Lindirect_use_50_vgpr.uses_vcc, or(1, .Luse_50_vgpr.uses_vcc)
|
|
; GCN: .set .Lindirect_use_50_vgpr.uses_flat_scratch, or(0, .Luse_50_vgpr.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_50_vgpr.has_dyn_sized_stack, or(0, .Luse_50_vgpr.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_50_vgpr.has_recursion, or(0, .Luse_50_vgpr.has_recursion)
|
|
; GCN: .set .Lindirect_use_50_vgpr.has_indirect_call, or(0, .Luse_50_vgpr.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 50
|
|
; GCN: ScratchSize: 16
|
|
define void @indirect_use_50_vgpr() #0 {
|
|
call void @use_50_vgpr()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_80_sgpr:
|
|
; GCN: .set .Luse_80_sgpr.num_vgpr, 0
|
|
; GCN: .set .Luse_80_sgpr.num_agpr, 0
|
|
; GCN: .set .Luse_80_sgpr.numbered_sgpr, 80
|
|
; GCN: .set .Luse_80_sgpr.private_seg_size, 0
|
|
; GCN: .set .Luse_80_sgpr.uses_vcc, 0
|
|
; GCN: .set .Luse_80_sgpr.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_80_sgpr.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_80_sgpr.has_recursion, 0
|
|
; GCN: .set .Luse_80_sgpr.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 84
|
|
; GCN: NumVgprs: 0
|
|
; GCN: ScratchSize: 0
|
|
define void @use_80_sgpr() #1 {
|
|
call void asm sideeffect "", "~{s79}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_80_sgpr:
|
|
; GCN: .set .Lindirect_use_80_sgpr.num_vgpr, max(41, .Luse_80_sgpr.num_vgpr)
|
|
; GCN: .set .Lindirect_use_80_sgpr.num_agpr, max(0, .Luse_80_sgpr.num_agpr)
|
|
; GCN: .set .Lindirect_use_80_sgpr.numbered_sgpr, max(34, .Luse_80_sgpr.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_80_sgpr.private_seg_size, 16+max(.Luse_80_sgpr.private_seg_size)
|
|
; GCN: .set .Lindirect_use_80_sgpr.uses_vcc, or(1, .Luse_80_sgpr.uses_vcc)
|
|
; GCN: .set .Lindirect_use_80_sgpr.uses_flat_scratch, or(0, .Luse_80_sgpr.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_80_sgpr.has_dyn_sized_stack, or(0, .Luse_80_sgpr.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_80_sgpr.has_recursion, or(0, .Luse_80_sgpr.has_recursion)
|
|
; GCN: .set .Lindirect_use_80_sgpr.has_indirect_call, or(0, .Luse_80_sgpr.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 84
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define void @indirect_use_80_sgpr() #1 {
|
|
call void @use_80_sgpr()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.num_vgpr, max(32, .Lindirect_use_80_sgpr.num_vgpr)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.num_agpr, max(0, .Lindirect_use_80_sgpr.num_agpr)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.numbered_sgpr, max(33, .Lindirect_use_80_sgpr.numbered_sgpr)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.private_seg_size, 0+max(.Lindirect_use_80_sgpr.private_seg_size)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_vcc, or(1, .Lindirect_use_80_sgpr.uses_vcc)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.uses_flat_scratch, or(1, .Lindirect_use_80_sgpr.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.has_dyn_sized_stack, or(0, .Lindirect_use_80_sgpr.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.has_recursion, or(0, .Lindirect_use_80_sgpr.has_recursion)
|
|
; GCN: .set .Lindirect_2_level_use_80_sgpr.has_indirect_call, or(0, .Lindirect_use_80_sgpr.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 86
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
|
|
call void @indirect_use_80_sgpr()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_stack0:
|
|
; GCN: .set .Luse_stack0.num_vgpr, 1
|
|
; GCN: .set .Luse_stack0.num_agpr, 0
|
|
; GCN: .set .Luse_stack0.numbered_sgpr, 33
|
|
; GCN: .set .Luse_stack0.private_seg_size, 2052
|
|
; GCN: .set .Luse_stack0.uses_vcc, 0
|
|
; GCN: .set .Luse_stack0.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_stack0.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_stack0.has_recursion, 0
|
|
; GCN: .set .Luse_stack0.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 37
|
|
; GCN: NumVgprs: 1
|
|
; GCN: ScratchSize: 2052
|
|
define void @use_stack0() #1 {
|
|
%alloca = alloca [512 x i32], align 4, addrspace(5)
|
|
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}use_stack1:
|
|
; GCN: .set .Luse_stack1.num_vgpr, 1
|
|
; GCN: .set .Luse_stack1.num_agpr, 0
|
|
; GCN: .set .Luse_stack1.numbered_sgpr, 33
|
|
; GCN: .set .Luse_stack1.private_seg_size, 404
|
|
; GCN: .set .Luse_stack1.uses_vcc, 0
|
|
; GCN: .set .Luse_stack1.uses_flat_scratch, 0
|
|
; GCN: .set .Luse_stack1.has_dyn_sized_stack, 0
|
|
; GCN: .set .Luse_stack1.has_recursion, 0
|
|
; GCN: .set .Luse_stack1.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 37
|
|
; GCN: NumVgprs: 1
|
|
; GCN: ScratchSize: 404
|
|
define void @use_stack1() #1 {
|
|
%alloca = alloca [100 x i32], align 4, addrspace(5)
|
|
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_use_stack:
|
|
; GCN: .set .Lindirect_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr)
|
|
; GCN: .set .Lindirect_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr)
|
|
; GCN: .set .Lindirect_use_stack.numbered_sgpr, max(34, .Luse_stack0.numbered_sgpr)
|
|
; GCN: .set .Lindirect_use_stack.private_seg_size, 80+max(.Luse_stack0.private_seg_size)
|
|
; GCN: .set .Lindirect_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc)
|
|
; GCN: .set .Lindirect_use_stack.uses_flat_scratch, or(0, .Luse_stack0.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion)
|
|
; GCN: .set .Lindirect_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 2132
|
|
define void @indirect_use_stack() #1 {
|
|
%alloca = alloca [16 x i32], align 4, addrspace(5)
|
|
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
|
|
call void @use_stack0()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
|
|
; GCN: .set .Lindirect_2_level_use_stack.num_vgpr, max(32, .Lindirect_use_stack.num_vgpr)
|
|
; GCN: .set .Lindirect_2_level_use_stack.num_agpr, max(0, .Lindirect_use_stack.num_agpr)
|
|
; GCN: .set .Lindirect_2_level_use_stack.numbered_sgpr, max(33, .Lindirect_use_stack.numbered_sgpr)
|
|
; GCN: .set .Lindirect_2_level_use_stack.private_seg_size, 0+max(.Lindirect_use_stack.private_seg_size)
|
|
; GCN: .set .Lindirect_2_level_use_stack.uses_vcc, or(1, .Lindirect_use_stack.uses_vcc)
|
|
; GCN: .set .Lindirect_2_level_use_stack.uses_flat_scratch, or(1, .Lindirect_use_stack.uses_flat_scratch)
|
|
; GCN: .set .Lindirect_2_level_use_stack.has_dyn_sized_stack, or(0, .Lindirect_use_stack.has_dyn_sized_stack)
|
|
; GCN: .set .Lindirect_2_level_use_stack.has_recursion, or(0, .Lindirect_use_stack.has_recursion)
|
|
; GCN: .set .Lindirect_2_level_use_stack.has_indirect_call, or(0, .Lindirect_use_stack.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 2132
|
|
define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
|
|
call void @indirect_use_stack()
|
|
ret void
|
|
}
|
|
|
|
|
|
; Should be maximum of callee usage
|
|
; GCN-LABEL: {{^}}multi_call_use_use_stack:
|
|
; GCN: .set .Lmulti_call_use_use_stack.num_vgpr, max(41, .Luse_stack0.num_vgpr, .Luse_stack1.num_vgpr)
|
|
; GCN: .set .Lmulti_call_use_use_stack.num_agpr, max(0, .Luse_stack0.num_agpr, .Luse_stack1.num_agpr)
|
|
; GCN: .set .Lmulti_call_use_use_stack.numbered_sgpr, max(52, .Luse_stack0.numbered_sgpr, .Luse_stack1.numbered_sgpr)
|
|
; GCN: .set .Lmulti_call_use_use_stack.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size)
|
|
; GCN: .set .Lmulti_call_use_use_stack.uses_vcc, or(1, .Luse_stack0.uses_vcc, .Luse_stack1.uses_vcc)
|
|
; GCN: .set .Lmulti_call_use_use_stack.uses_flat_scratch, or(1, .Luse_stack0.uses_flat_scratch, .Luse_stack1.uses_flat_scratch)
|
|
; GCN: .set .Lmulti_call_use_use_stack.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack, .Luse_stack1.has_dyn_sized_stack)
|
|
; GCN: .set .Lmulti_call_use_use_stack.has_recursion, or(0, .Luse_stack0.has_recursion, .Luse_stack1.has_recursion)
|
|
; GCN: .set .Lmulti_call_use_use_stack.has_indirect_call, or(0, .Luse_stack0.has_indirect_call, .Luse_stack1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 58
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 2052
|
|
define amdgpu_kernel void @multi_call_use_use_stack() #0 {
|
|
call void @use_stack0()
|
|
call void @use_stack1()
|
|
ret void
|
|
}
|
|
|
|
declare void @external() #0
|
|
|
|
; GCN-LABEL: {{^}}multi_call_with_external:
|
|
; GCN: .set .Lmulti_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lmulti_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lmulti_call_with_external.numbered_sgpr, max(52, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lmulti_call_with_external.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size)
|
|
; GCN: .set .Lmulti_call_with_external.uses_vcc, 1
|
|
; GCN: .set .Lmulti_call_with_external.uses_flat_scratch, 1
|
|
; GCN: .set .Lmulti_call_with_external.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lmulti_call_with_external.has_recursion, 0
|
|
; GCN: .set .Lmulti_call_with_external.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lmulti_call_with_external.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lmulti_call_with_external.num_vgpr
|
|
; GCN: ScratchSize: 2052
|
|
define amdgpu_kernel void @multi_call_with_external() #0 {
|
|
call void @use_stack0()
|
|
call void @use_stack1()
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates:
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.numbered_sgpr, max(54, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size)
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.uses_vcc, 1
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.uses_flat_scratch, 1
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.has_recursion, 0
|
|
; GCN: .set .Lmulti_call_with_external_and_duplicates.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lmulti_call_with_external_and_duplicates.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lmulti_call_with_external_and_duplicates.num_vgpr
|
|
; GCN: ScratchSize: 2052
|
|
define amdgpu_kernel void @multi_call_with_external_and_duplicates() #0 {
|
|
call void @use_stack0()
|
|
call void @use_stack0()
|
|
call void @use_stack1()
|
|
call void @use_stack1()
|
|
call void @external()
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}usage_external:
|
|
; GCN: .set .Lusage_external.num_vgpr, max(32, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lusage_external.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lusage_external.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lusage_external.private_seg_size, 0
|
|
; GCN: .set .Lusage_external.uses_vcc, 1
|
|
; GCN: .set .Lusage_external.uses_flat_scratch, 1
|
|
; GCN: .set .Lusage_external.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lusage_external.has_recursion, 0
|
|
; GCN: .set .Lusage_external.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lusage_external.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lusage_external.num_vgpr
|
|
; GCN: ScratchSize: 0
|
|
define amdgpu_kernel void @usage_external() #0 {
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
declare void @external_recurse() #2
|
|
|
|
; GCN-LABEL: {{^}}usage_external_recurse:
|
|
; GCN: .set .Lusage_external_recurse.num_vgpr, max(32, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lusage_external_recurse.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lusage_external_recurse.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lusage_external_recurse.private_seg_size, 0
|
|
; GCN: .set .Lusage_external_recurse.uses_vcc, 1
|
|
; GCN: .set .Lusage_external_recurse.uses_flat_scratch, 1
|
|
; GCN: .set .Lusage_external_recurse.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lusage_external_recurse.has_recursion, 1
|
|
; GCN: .set .Lusage_external_recurse.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lusage_external_recurse.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lusage_external_recurse.num_vgpr
|
|
; GCN: ScratchSize: 0
|
|
define amdgpu_kernel void @usage_external_recurse() #0 {
|
|
call void @external_recurse()
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}direct_recursion_use_stack:
|
|
; GCN: .set .Ldirect_recursion_use_stack.num_vgpr, 41
|
|
; GCN: .set .Ldirect_recursion_use_stack.num_agpr, 0
|
|
; GCN: .set .Ldirect_recursion_use_stack.numbered_sgpr, 36
|
|
; GCN: .set .Ldirect_recursion_use_stack.private_seg_size, 2064
|
|
; GCN: .set .Ldirect_recursion_use_stack.uses_vcc, 1
|
|
; GCN: .set .Ldirect_recursion_use_stack.uses_flat_scratch, 0
|
|
; GCN: .set .Ldirect_recursion_use_stack.has_dyn_sized_stack, 0
|
|
; GCN: .set .Ldirect_recursion_use_stack.has_recursion, 1
|
|
; GCN: .set .Ldirect_recursion_use_stack.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 2064
|
|
define void @direct_recursion_use_stack(i32 %val) #2 {
|
|
%alloca = alloca [512 x i32], align 4, addrspace(5)
|
|
call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0
|
|
%cmp = icmp eq i32 %val, 0
|
|
br i1 %cmp, label %ret, label %call
|
|
|
|
call:
|
|
%val.sub1 = sub i32 %val, 1
|
|
call void @direct_recursion_use_stack(i32 %val.sub1)
|
|
br label %ret
|
|
|
|
ret:
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}usage_direct_recursion:
|
|
; GCN: .set .Lusage_direct_recursion.num_vgpr, max(32, .Ldirect_recursion_use_stack.num_vgpr)
|
|
; GCN: .set .Lusage_direct_recursion.num_agpr, max(0, .Ldirect_recursion_use_stack.num_agpr)
|
|
; GCN: .set .Lusage_direct_recursion.numbered_sgpr, max(33, .Ldirect_recursion_use_stack.numbered_sgpr)
|
|
; GCN: .set .Lusage_direct_recursion.private_seg_size, 0+max(.Ldirect_recursion_use_stack.private_seg_size)
|
|
; GCN: .set .Lusage_direct_recursion.uses_vcc, or(1, .Ldirect_recursion_use_stack.uses_vcc)
|
|
; GCN: .set .Lusage_direct_recursion.uses_flat_scratch, or(1, .Ldirect_recursion_use_stack.uses_flat_scratch)
|
|
; GCN: .set .Lusage_direct_recursion.has_dyn_sized_stack, or(0, .Ldirect_recursion_use_stack.has_dyn_sized_stack)
|
|
; GCN: .set .Lusage_direct_recursion.has_recursion, or(1, .Ldirect_recursion_use_stack.has_recursion)
|
|
; GCN: .set .Lusage_direct_recursion.has_indirect_call, or(0, .Ldirect_recursion_use_stack.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 42
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 2064
|
|
define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
|
|
call void @direct_recursion_use_stack(i32 %n)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}multi_stage_recurse2:
|
|
; GCN: .set .Lmulti_stage_recurse2.num_vgpr, max(43, .Lmulti_stage_recurse1.num_vgpr)
|
|
; GCN: .set .Lmulti_stage_recurse2.num_agpr, max(0, .Lmulti_stage_recurse1.num_agpr)
|
|
; GCN: .set .Lmulti_stage_recurse2.numbered_sgpr, max(34, .Lmulti_stage_recurse1.numbered_sgpr)
|
|
; GCN: .set .Lmulti_stage_recurse2.private_seg_size, 16+max(.Lmulti_stage_recurse1.private_seg_size)
|
|
; GCN: .set .Lmulti_stage_recurse2.uses_vcc, or(1, .Lmulti_stage_recurse1.uses_vcc)
|
|
; GCN: .set .Lmulti_stage_recurse2.uses_flat_scratch, or(0, .Lmulti_stage_recurse1.uses_flat_scratch)
|
|
; GCN: .set .Lmulti_stage_recurse2.has_dyn_sized_stack, or(0, .Lmulti_stage_recurse1.has_dyn_sized_stack)
|
|
; GCN: .set .Lmulti_stage_recurse2.has_recursion, or(1, .Lmulti_stage_recurse1.has_recursion)
|
|
; GCN: .set .Lmulti_stage_recurse2.has_indirect_call, or(0, .Lmulti_stage_recurse1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: .Lmulti_stage_recurse2.numbered_sgpr+extrasgprs(.Lmulti_stage_recurse2.uses_vcc, .Lmulti_stage_recurse2.uses_flat_scratch, 1)
|
|
; GCN: NumVgprs: max(43, .Lmulti_stage_recurse1.num_vgpr)
|
|
; GCN: ScratchSize: 16+max(.Lmulti_stage_recurse1.private_seg_size)
|
|
; GCN-LABEL: {{^}}multi_stage_recurse1:
|
|
; GCN: .set .Lmulti_stage_recurse1.num_vgpr, max(48, 43)
|
|
; GCN: .set .Lmulti_stage_recurse1.num_agpr, max(0, 0)
|
|
; GCN: .set .Lmulti_stage_recurse1.numbered_sgpr, max(34, 34)
|
|
; GCN: .set .Lmulti_stage_recurse1.private_seg_size, 16
|
|
; GCN: .set .Lmulti_stage_recurse1.uses_vcc, 1
|
|
; GCN: .set .Lmulti_stage_recurse1.uses_flat_scratch, 0
|
|
; GCN: .set .Lmulti_stage_recurse1.has_dyn_sized_stack, 0
|
|
; GCN: .set .Lmulti_stage_recurse1.has_recursion, 1
|
|
; GCN: .set .Lmulti_stage_recurse1.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 38
|
|
; GCN: NumVgprs: 48
|
|
; GCN: ScratchSize: 16
|
|
define void @multi_stage_recurse1(i32 %val) #2 {
|
|
call void @multi_stage_recurse2(i32 %val)
|
|
call void asm sideeffect "", "~{v47}"() #0
|
|
ret void
|
|
}
|
|
define void @multi_stage_recurse2(i32 %val) #2 {
|
|
call void @multi_stage_recurse1(i32 %val)
|
|
call void asm sideeffect "", "~{v42}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}usage_multi_stage_recurse:
|
|
; GCN: .set .Lusage_multi_stage_recurse.num_vgpr, max(32, .Lmulti_stage_recurse1.num_vgpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse.num_agpr, max(0, .Lmulti_stage_recurse1.num_agpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse.numbered_sgpr, max(33, .Lmulti_stage_recurse1.numbered_sgpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse.private_seg_size, 0+max(.Lmulti_stage_recurse1.private_seg_size)
|
|
; GCN: .set .Lusage_multi_stage_recurse.uses_vcc, or(1, .Lmulti_stage_recurse1.uses_vcc)
|
|
; GCN: .set .Lusage_multi_stage_recurse.uses_flat_scratch, or(1, .Lmulti_stage_recurse1.uses_flat_scratch)
|
|
; GCN: .set .Lusage_multi_stage_recurse.has_dyn_sized_stack, or(0, .Lmulti_stage_recurse1.has_dyn_sized_stack)
|
|
; GCN: .set .Lusage_multi_stage_recurse.has_recursion, or(1, .Lmulti_stage_recurse1.has_recursion)
|
|
; GCN: .set .Lusage_multi_stage_recurse.has_indirect_call, or(0, .Lmulti_stage_recurse1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 40
|
|
; GCN: NumVgprs: 48
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @usage_multi_stage_recurse(i32 %n) #0 {
|
|
call void @multi_stage_recurse1(i32 %n)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}multi_stage_recurse_noattr2:
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.num_vgpr, max(41, .Lmulti_stage_recurse_noattr1.num_vgpr)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.num_agpr, max(0, .Lmulti_stage_recurse_noattr1.num_agpr)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.numbered_sgpr, max(54, .Lmulti_stage_recurse_noattr1.numbered_sgpr)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.private_seg_size, 16+max(.Lmulti_stage_recurse_noattr1.private_seg_size)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.uses_vcc, or(1, .Lmulti_stage_recurse_noattr1.uses_vcc)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.uses_flat_scratch, or(0, .Lmulti_stage_recurse_noattr1.uses_flat_scratch)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.has_dyn_sized_stack, or(0, .Lmulti_stage_recurse_noattr1.has_dyn_sized_stack)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.has_recursion, or(0, .Lmulti_stage_recurse_noattr1.has_recursion)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr2.has_indirect_call, or(0, .Lmulti_stage_recurse_noattr1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: .Lmulti_stage_recurse_noattr2.numbered_sgpr+extrasgprs(.Lmulti_stage_recurse_noattr2.uses_vcc, .Lmulti_stage_recurse_noattr2.uses_flat_scratch, 1)
|
|
; GCN: NumVgprs: max(41, .Lmulti_stage_recurse_noattr1.num_vgpr)
|
|
; GCN: ScratchSize: 16+max(.Lmulti_stage_recurse_noattr1.private_seg_size)
|
|
; GCN-LABEL: {{^}}multi_stage_recurse_noattr1:
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.num_vgpr, max(41, 41)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.num_agpr, max(0, 0)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.numbered_sgpr, max(57, 54)
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.private_seg_size, 16
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.uses_vcc, 1
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.uses_flat_scratch, 0
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.has_dyn_sized_stack, 0
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.has_recursion, 0
|
|
; GCN: .set .Lmulti_stage_recurse_noattr1.has_indirect_call, 0
|
|
; GCN: TotalNumSgprs: 61
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define void @multi_stage_recurse_noattr1(i32 %val) #0 {
|
|
call void @multi_stage_recurse_noattr2(i32 %val)
|
|
call void asm sideeffect "", "~{s56}"() #0
|
|
ret void
|
|
}
|
|
define void @multi_stage_recurse_noattr2(i32 %val) #0 {
|
|
call void @multi_stage_recurse_noattr1(i32 %val)
|
|
call void asm sideeffect "", "~{s53}"() #0
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}usage_multi_stage_recurse_noattrs:
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.num_vgpr, max(32, .Lmulti_stage_recurse_noattr1.num_vgpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.num_agpr, max(0, .Lmulti_stage_recurse_noattr1.num_agpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.numbered_sgpr, max(33, .Lmulti_stage_recurse_noattr1.numbered_sgpr)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.private_seg_size, 0+max(.Lmulti_stage_recurse_noattr1.private_seg_size)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.uses_vcc, or(1, .Lmulti_stage_recurse_noattr1.uses_vcc)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.uses_flat_scratch, or(1, .Lmulti_stage_recurse_noattr1.uses_flat_scratch)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.has_dyn_sized_stack, or(0, .Lmulti_stage_recurse_noattr1.has_dyn_sized_stack)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.has_recursion, or(0, .Lmulti_stage_recurse_noattr1.has_recursion)
|
|
; GCN: .set .Lusage_multi_stage_recurse_noattrs.has_indirect_call, or(0, .Lmulti_stage_recurse_noattr1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 63
|
|
; GCN: NumVgprs: 41
|
|
; GCN: ScratchSize: 16
|
|
define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 {
|
|
call void @multi_stage_recurse_noattr1(i32 %n)
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse:
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.num_vgpr, max(41, .Luse_stack0.num_vgpr, .Luse_stack1.num_vgpr, .Lmulti_stage_recurse1.num_vgpr)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.num_agpr, max(0, .Luse_stack0.num_agpr, .Luse_stack1.num_agpr, .Lmulti_stage_recurse1.num_agpr)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.numbered_sgpr, max(53, .Luse_stack0.numbered_sgpr, .Luse_stack1.numbered_sgpr, .Lmulti_stage_recurse1.numbered_sgpr)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.private_seg_size, 0+max(.Luse_stack0.private_seg_size, .Luse_stack1.private_seg_size, .Lmulti_stage_recurse1.private_seg_size)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.uses_vcc, or(1, .Luse_stack0.uses_vcc, .Luse_stack1.uses_vcc, .Lmulti_stage_recurse1.uses_vcc)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.uses_flat_scratch, or(1, .Luse_stack0.uses_flat_scratch, .Luse_stack1.uses_flat_scratch, .Lmulti_stage_recurse1.uses_flat_scratch)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.has_dyn_sized_stack, or(0, .Luse_stack0.has_dyn_sized_stack, .Luse_stack1.has_dyn_sized_stack, .Lmulti_stage_recurse1.has_dyn_sized_stack)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.has_recursion, or(1, .Luse_stack0.has_recursion, .Luse_stack1.has_recursion, .Lmulti_stage_recurse1.has_recursion)
|
|
; GCN: .set .Lmulti_call_with_multi_stage_recurse.has_indirect_call, or(0, .Luse_stack0.has_indirect_call, .Luse_stack1.has_indirect_call, .Lmulti_stage_recurse1.has_indirect_call)
|
|
; GCN: TotalNumSgprs: 59
|
|
; GCN: NumVgprs: 48
|
|
; GCN: ScratchSize: 2052
|
|
define amdgpu_kernel void @multi_call_with_multi_stage_recurse(i32 %n) #0 {
|
|
call void @use_stack0()
|
|
call void @use_stack1()
|
|
call void @multi_stage_recurse1(i32 %n)
|
|
ret void
|
|
}
|
|
|
|
; Make sure there's no assert when a sgpr96 is used.
|
|
; GCN-LABEL: {{^}}count_use_sgpr96_external_call
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.private_seg_size, 0
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.uses_vcc, 1
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.uses_flat_scratch, 1
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.has_recursion, 0
|
|
; GCN: .set .Lcount_use_sgpr96_external_call.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lcount_use_sgpr96_external_call.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lcount_use_sgpr96_external_call.num_vgpr
|
|
; GCN: ScratchSize: 0
|
|
define amdgpu_kernel void @count_use_sgpr96_external_call() {
|
|
entry:
|
|
tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
; Make sure there's no assert when a sgpr160 is used.
|
|
; GCN-LABEL: {{^}}count_use_sgpr160_external_call
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.private_seg_size, 0
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.uses_vcc, 1
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.uses_flat_scratch, 1
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.has_recursion, 0
|
|
; GCN: .set .Lcount_use_sgpr160_external_call.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lcount_use_sgpr160_external_call.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lcount_use_sgpr160_external_call.num_vgpr
|
|
; GCN: ScratchSize: 0
|
|
define amdgpu_kernel void @count_use_sgpr160_external_call() {
|
|
entry:
|
|
tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
; Make sure there's no assert when a vgpr160 is used.
|
|
; GCN-LABEL: {{^}}count_use_vgpr160_external_call
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.num_vgpr, max(32, amdgpu.max_num_vgpr)
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.num_agpr, max(0, amdgpu.max_num_agpr)
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr)
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.private_seg_size, 0
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.uses_vcc, 1
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.uses_flat_scratch, 1
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.has_dyn_sized_stack, 1
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.has_recursion, 0
|
|
; GCN: .set .Lcount_use_vgpr160_external_call.has_indirect_call, 1
|
|
; GCN: TotalNumSgprs: .Lcount_use_vgpr160_external_call.numbered_sgpr+6
|
|
; GCN: NumVgprs: .Lcount_use_vgpr160_external_call.num_vgpr
|
|
; GCN: ScratchSize: 0
|
|
define amdgpu_kernel void @count_use_vgpr160_external_call() {
|
|
entry:
|
|
tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
|
|
call void @external()
|
|
ret void
|
|
}
|
|
|
|
; Test that amdgcn.device.init correctly references amdgpu.max_num_named_barrier
|
|
; GCN-LABEL: {{^}}amdgcn.device.init:
|
|
; GCN: .set .Lamdgcn.device.init.num_named_barrier, max(0, amdgpu.max_num_named_barrier)
|
|
|
|
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @ctor_func, ptr null }]
|
|
|
|
define internal void @ctor_func() {
|
|
ret void
|
|
}
|
|
|
|
; Added at the of the .s are the module level maximums
|
|
; GCN: .set amdgpu.max_num_vgpr, 50
|
|
; GCN: .set amdgpu.max_num_agpr, 0
|
|
; GCN: .set amdgpu.max_num_sgpr, 80
|
|
; GCN: .set amdgpu.max_num_named_barrier, 0
|
|
|
|
attributes #0 = { nounwind noinline norecurse }
|
|
attributes #1 = { nounwind noinline norecurse }
|
|
attributes #2 = { nounwind noinline }
|