LocalStackSlotAllocation pass disallows negative offsets with respect to a base register. The pass ends up introducing a new register for such frame references. This patch helps LocalStackSlotAlloca to additionally consider the immediate offset of an instruction, when sorting frame refs - hence, avoiding negative offsets and maximizing reuse of the existing registers.
28 lines
1.5 KiB
YAML
28 lines
1.5 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=localstackalloc -o - %s | FileCheck %s
|
|
|
|
---
|
|
name: issue155902
|
|
stack:
|
|
- { id: 0, offset: 0, size: 16384, alignment: 4 }
|
|
- { id: 1, offset: 0, size: 16, alignment: 8 }
|
|
- { id: 2, offset: 0, size: 8, alignment: 8 }
|
|
body: |
|
|
bb.0:
|
|
|
|
; CHECK-LABEL: name: issue155902
|
|
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xexec_hi = S_MOV_B32 %stack.1
|
|
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
|
|
; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 8, 0, implicit $exec, implicit $flat_scr
|
|
; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR [[V_MOV_B]], [[S_MOV_B32_]], 0, 0, implicit $exec, implicit $flat_scr
|
|
; CHECK-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
|
|
; CHECK-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed [[V_MOV_B1]], [[S_MOV_B32_]], 16, 0, implicit $exec, implicit $flat_scr
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
%0:vreg_64_align2 = V_MOV_B64_PSEUDO 0, implicit $exec
|
|
SCRATCH_STORE_DWORDX2_SADDR %0, %stack.1, 8, 0, implicit $exec, implicit $flat_scr
|
|
SCRATCH_STORE_DWORDX2_SADDR %0, %stack.1, 0, 0, implicit $exec, implicit $flat_scr
|
|
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 900, implicit $exec
|
|
SCRATCH_STORE_DWORDX2_SADDR killed %1, %stack.2, 0, 0, implicit $exec, implicit $flat_scr
|
|
S_ENDPGM 0
|
|
...
|