[AMDGPU][NFC] Use LaneMaskConstants for waterfall loops in AMDGPURegBankLegalizeHelper (#190792)
Use `LaneMaskConstants` for generating waterfall loops in `AMDGPURegBankLegalizeHelper`. No Functionality Change.
This commit is contained in:
@@ -14,6 +14,7 @@
|
||||
#include "AMDGPURegBankLegalizeHelper.h"
|
||||
#include "AMDGPUGlobalISelUtils.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPULaneMaskUtils.h"
|
||||
#include "AMDGPURegBankLegalizeRules.h"
|
||||
#include "AMDGPURegisterBankInfo.h"
|
||||
#include "GCNSubtarget.h"
|
||||
@@ -95,20 +96,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
||||
|
||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
|
||||
unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
|
||||
if (IsWave32) {
|
||||
MovExecOpc = AMDGPU::S_MOV_B32;
|
||||
MovExecTermOpc = AMDGPU::S_MOV_B32_term;
|
||||
XorTermOpc = AMDGPU::S_XOR_B32_term;
|
||||
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
|
||||
ExecReg = AMDGPU::EXEC_LO;
|
||||
} else {
|
||||
MovExecOpc = AMDGPU::S_MOV_B64;
|
||||
MovExecTermOpc = AMDGPU::S_MOV_B64_term;
|
||||
XorTermOpc = AMDGPU::S_XOR_B64_term;
|
||||
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
|
||||
ExecReg = AMDGPU::EXEC;
|
||||
}
|
||||
const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);
|
||||
|
||||
#ifndef NDEBUG
|
||||
const int OrigRangeSize = std::distance(BeginIt, EndIt);
|
||||
@@ -270,7 +258,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
||||
B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
|
||||
|
||||
// Update EXEC, save the original EXEC value to SavedExec.
|
||||
B.buildInstr(AndSaveExecOpc)
|
||||
B.buildInstr(LMC.AndSaveExecOpc)
|
||||
.addDef(SavedExec)
|
||||
.addReg(CondRegLM, RegState::Kill);
|
||||
MRI.setSimpleHint(SavedExec, CondRegLM);
|
||||
@@ -278,7 +266,10 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
||||
B.setInsertPt(*BodyBB, BodyBB->end());
|
||||
|
||||
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
|
||||
B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
|
||||
B.buildInstr(LMC.XorTermOpc)
|
||||
.addDef(LMC.ExecReg)
|
||||
.addReg(LMC.ExecReg)
|
||||
.addReg(SavedExec);
|
||||
|
||||
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
|
||||
// s_cbranch_scc0?
|
||||
@@ -288,11 +279,11 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
||||
|
||||
// Save the EXEC mask before the loop.
|
||||
B.setInsertPt(MBB, MBB.end());
|
||||
B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
|
||||
B.buildInstr(LMC.MovOpc).addDef(SaveExecReg).addReg(LMC.ExecReg);
|
||||
|
||||
// Restore the EXEC mask after the loop.
|
||||
B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
|
||||
B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
|
||||
B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);
|
||||
|
||||
// Set the insert point after the original instruction, so any new
|
||||
// instructions will be in the remainder.
|
||||
|
||||
Reference in New Issue
Block a user