Files
llvm-project/llvm/test/CodeGen/AMDGPU/ds-read2-write2-debug-info.ll
Matt Arsenault e5e74e9877 AMDGPU: Use getMergedLocation in SILoadStoreOptimizer (#156396)
This is merging loads and stores so use the combined DebugLoc.

Not sure if computeBase should be using the merged location from
all the involved instructions. I'm also not sure how to test this
sort of thing.
2025-11-10 15:04:24 -08:00

90 lines
4.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=debugify < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s
@lds = addrspace(3) global [512 x float] poison, align 4
define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CHECK-LABEL: simple_write2_one_val_f32:
; CHECK: .Lfunc_begin0:
; CHECK-NEXT: .cfi_sections .debug_frame
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: ; %bb.0:
; CHECK-NEXT: .file 1 "/" "<stdin>"
; CHECK-NEXT: .loc 1 1 1 prologue_end ; <stdin>:1:1
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:1 <- $vgpr0
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:5 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] $vgpr0
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:3 <- undef
; CHECK-NEXT: .loc 1 2 1 ; <stdin>:2:1
; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:4 <- $vgpr0
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:2 <- undef
; CHECK-NEXT: .loc 1 3 1 ; <stdin>:3:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: global_load_dword v1, v0, s[0:1]
; CHECK-NEXT: .Ltmp2:
; CHECK-NEXT: ;DEBUG_VALUE: simple_write2_one_val_f32:6 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr0
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: ds_write2_b32 v0, v1, v1 offset1:8
; CHECK-NEXT: .loc 1 9 1 is_stmt 1 ; <stdin>:9:1
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .Ltmp3:
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%in.gep = getelementptr float, ptr addrspace(1) %in, i32 %x.i
%val = load float, ptr addrspace(1) %in.gep, align 4
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
store float %val, ptr addrspace(3) %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x
store float %val, ptr addrspace(3) %arrayidx1, align 4
ret void
}
define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
; CHECK-LABEL: simple_read2_f32:
; CHECK: .Lfunc_begin1:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: ; %bb.0:
; CHECK-NEXT: .loc 1 11 1 prologue_end ; <stdin>:11:1
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; CHECK-NEXT: .Ltmp4:
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:8 <- $vgpr2
; CHECK-NEXT: .loc 1 0 0 is_stmt 0 ; <stdin>:0
; CHECK-NEXT: ds_read2_b32 v[0:1], v2 offset1:8
; CHECK-NEXT: .Ltmp5:
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:9 <- undef
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:11 <- [DW_OP_plus_uconst 32, DW_OP_stack_value] $vgpr2
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:12 <- undef
; CHECK-NEXT: .loc 1 10 1 is_stmt 1 ; <stdin>:10:1
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CHECK-NEXT: .Ltmp6:
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:7 <- undef
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:10 <- [DW_OP_plus_uconst 8, DW_OP_stack_value] undef
; CHECK-NEXT: .loc 1 16 1 ; <stdin>:16:1
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_add_f32_e32 v0, v0, v1
; CHECK-NEXT: .Ltmp7:
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:13 <- $vgpr0
; CHECK-NEXT: ;DEBUG_VALUE: simple_read2_f32:14 <- undef
; CHECK-NEXT: .loc 1 18 1 ; <stdin>:18:1
; CHECK-NEXT: global_store_dword v2, v0, s[0:1]
; CHECK-NEXT: .loc 1 19 1 ; <stdin>:19:1
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: .Ltmp8:
%x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %x.i
%val0 = load float, ptr addrspace(3) %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
%arrayidx1 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %add.x
%val1 = load float, ptr addrspace(3) %arrayidx1, align 4
%sum = fadd float %val0, %val1
%out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i32 %x.i
store float %sum, ptr addrspace(1) %out.gep, align 4
ret void
}
attributes #0 = { nounwind }