[AMDGPU][NFC] Use LaneMaskConstants for waterfall loops in AMDGPURegBankLegalizeHelper (#190792)

Use `LaneMaskConstants` for generating waterfall loops in
`AMDGPURegBankLegalizeHelper`.
No Functionality Change.
This commit is contained in:
gretay-amd
2026-04-30 11:24:49 +01:00
committed by GitHub
parent f7d40320a0
commit a8eb65a909

View File

@@ -14,6 +14,7 @@
#include "AMDGPURegBankLegalizeHelper.h" #include "AMDGPURegBankLegalizeHelper.h"
#include "AMDGPUGlobalISelUtils.h" #include "AMDGPUGlobalISelUtils.h"
#include "AMDGPUInstrInfo.h" #include "AMDGPUInstrInfo.h"
#include "AMDGPULaneMaskUtils.h"
#include "AMDGPURegBankLegalizeRules.h" #include "AMDGPURegBankLegalizeRules.h"
#include "AMDGPURegisterBankInfo.h" #include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h" #include "GCNSubtarget.h"
@@ -95,20 +96,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIRegisterInfo *TRI = ST.getRegisterInfo();
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass(); const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg; const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);
if (IsWave32) {
MovExecOpc = AMDGPU::S_MOV_B32;
MovExecTermOpc = AMDGPU::S_MOV_B32_term;
XorTermOpc = AMDGPU::S_XOR_B32_term;
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
ExecReg = AMDGPU::EXEC_LO;
} else {
MovExecOpc = AMDGPU::S_MOV_B64;
MovExecTermOpc = AMDGPU::S_MOV_B64_term;
XorTermOpc = AMDGPU::S_XOR_B64_term;
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
ExecReg = AMDGPU::EXEC;
}
#ifndef NDEBUG #ifndef NDEBUG
const int OrigRangeSize = std::distance(BeginIt, EndIt); const int OrigRangeSize = std::distance(BeginIt, EndIt);
@@ -270,7 +258,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg); B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
// Update EXEC, save the original EXEC value to SavedExec. // Update EXEC, save the original EXEC value to SavedExec.
B.buildInstr(AndSaveExecOpc) B.buildInstr(LMC.AndSaveExecOpc)
.addDef(SavedExec) .addDef(SavedExec)
.addReg(CondRegLM, RegState::Kill); .addReg(CondRegLM, RegState::Kill);
MRI.setSimpleHint(SavedExec, CondRegLM); MRI.setSimpleHint(SavedExec, CondRegLM);
@@ -278,7 +266,10 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
B.setInsertPt(*BodyBB, BodyBB->end()); B.setInsertPt(*BodyBB, BodyBB->end());
// Update EXEC, switch all done bits to 0 and all todo bits to 1. // Update EXEC, switch all done bits to 0 and all todo bits to 1.
B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec); B.buildInstr(LMC.XorTermOpc)
.addDef(LMC.ExecReg)
.addReg(LMC.ExecReg)
.addReg(SavedExec);
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
// s_cbranch_scc0? // s_cbranch_scc0?
@@ -288,11 +279,11 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
// Save the EXEC mask before the loop. // Save the EXEC mask before the loop.
B.setInsertPt(MBB, MBB.end()); B.setInsertPt(MBB, MBB.end());
B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg); B.buildInstr(LMC.MovOpc).addDef(SaveExecReg).addReg(LMC.ExecReg);
// Restore the EXEC mask after the loop. // Restore the EXEC mask after the loop.
B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin()); B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg); B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);
// Set the insert point after the original instruction, so any new // Set the insert point after the original instruction, so any new
// instructions will be in the remainder. // instructions will be in the remainder.