; Test preferred alignment of non-entry functions on different AMDGPU ; architectures. Preferred alignment matches the instruction cache line size: ; ; GFX9 - cache line = 64B (.p2align 6) ; GFX10 - cache line = 64B (.p2align 6) ; GFX11 - cache line = 128B (.p2align 7) ; GFX12 - cache line = 128B (.p2align 7) ; --- Default (cache line alignment) --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s ; --- Optsize: alignment drops to minimum (Align(4) = .p2align 2) --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=OPTSIZE %s ; --- IR align attribute: ensureAlignment must not lower explicit alignment --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=EXPLICIT-ALIGN %s ; --- -align-all-functions=1 with optsize: verify floor at Align(4) --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -align-all-functions=1 < %s | FileCheck -check-prefix=ALIGN-ALL %s ; --- prefalign attribute: overrides target preferred alignment --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=PREFALIGN %s ; --- Entry function: 256B alignment unchanged --- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=ENTRY %s ; Non-entry function: alignment matches instruction cache line size. define void @non_entry_func() { ; GFX9: .p2align 6{{$}} ; GFX9: non_entry_func: ; GFX10: .p2align 6{{$}} ; GFX10: non_entry_func: ; GFX11: .p2align 7{{$}} ; GFX11: non_entry_func: ; GFX12: .p2align 7{{$}} ; GFX12: non_entry_func: ret void } ; Non-entry function with optsize: must still be at least Align(4). define void @optsize_func() optsize { ; OPTSIZE: .globl optsize_func ; OPTSIZE-NEXT: .p2align 2{{$}} ret void } ; Non-entry function with explicit IR align 128: ensureAlignment must not lower ; it. On GFX9 default is 64 (cache line), so 128 from IR must be preserved. define void @explicit_align_func() align 128 { ; EXPLICIT-ALIGN: .globl explicit_align_func ; EXPLICIT-ALIGN-NEXT: .p2align 7{{$}} ret void } ; Non-entry function with explicit IR align 32 on gfx900 -- lower than ; preferred (64), so preferred alignment wins. Result: .p2align 6. define void @low_align_func() align 32 { ; GFX9: .globl low_align_func ; GFX9-NEXT: .p2align 6{{$}} ret void } ; Optsize + -align-all-functions=1: MachineFunction::init sets Align(2), but ; ensureAlignment(4) in AsmPrinter restores the floor. With optsize, ; getPreferredAlignment returns max(Align(1), Align(4)) = Align(4). define void @align_all_optsize_func() optsize { ; ALIGN-ALL: .globl align_all_optsize_func ; ALIGN-ALL-NEXT: .p2align 2{{$}} ret void } ; prefalign(16) on gfx900 overrides target preferred (64) with 16. ; getPreferredAlignment uses prefalign directly instead of getPrefFunctionAlignment. ; Result: max(16, 4) = 16 -> .p2align 4. define void @prefalign_low_func() prefalign(16) { ; PREFALIGN: .globl prefalign_low_func ; PREFALIGN-NEXT: .p2align 4{{$}} ret void } ; prefalign(256) on gfx900 -- higher than target preferred (64). ; Result: max(256, 4) = 256 -> .p2align 8. define void @prefalign_high_func() prefalign(256) { ; PREFALIGN: .globl prefalign_high_func ; PREFALIGN-NEXT: .p2align 8{{$}} ret void } ; prefalign(2) on gfx900 -- below the 4-byte instruction alignment floor. ; ensureAlignment(4) in AsmPrinter guarantees the minimum. ; Result: max(2, 4) = 4 -> .p2align 2. define void @prefalign_floor_func() prefalign(2) { ; PREFALIGN: .globl prefalign_floor_func ; PREFALIGN-NEXT: .p2align 2{{$}} ret void } ; Entry function: must be 256B aligned regardless of our changes. define amdgpu_kernel void @entry_func() { ; ENTRY: .globl entry_func ; ENTRY-NEXT: .p2align 8{{$}} ret void }