[AMDGPU][NFC] Use LaneMaskConstants for waterfall loops in AMDGPURegBankLegalizeHelper (#190792)
Use `LaneMaskConstants` for generating waterfall loops in `AMDGPURegBankLegalizeHelper`. No Functionality Change.
This commit is contained in:
@@ -14,6 +14,7 @@
|
|||||||
#include "AMDGPURegBankLegalizeHelper.h"
|
#include "AMDGPURegBankLegalizeHelper.h"
|
||||||
#include "AMDGPUGlobalISelUtils.h"
|
#include "AMDGPUGlobalISelUtils.h"
|
||||||
#include "AMDGPUInstrInfo.h"
|
#include "AMDGPUInstrInfo.h"
|
||||||
|
#include "AMDGPULaneMaskUtils.h"
|
||||||
#include "AMDGPURegBankLegalizeRules.h"
|
#include "AMDGPURegBankLegalizeRules.h"
|
||||||
#include "AMDGPURegisterBankInfo.h"
|
#include "AMDGPURegisterBankInfo.h"
|
||||||
#include "GCNSubtarget.h"
|
#include "GCNSubtarget.h"
|
||||||
@@ -95,20 +96,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
|||||||
|
|
||||||
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
const SIRegisterInfo *TRI = ST.getRegisterInfo();
|
||||||
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
|
const TargetRegisterClass *WaveRC = TRI->getWaveMaskRegClass();
|
||||||
unsigned MovExecOpc, MovExecTermOpc, XorTermOpc, AndSaveExecOpc, ExecReg;
|
const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);
|
||||||
if (IsWave32) {
|
|
||||||
MovExecOpc = AMDGPU::S_MOV_B32;
|
|
||||||
MovExecTermOpc = AMDGPU::S_MOV_B32_term;
|
|
||||||
XorTermOpc = AMDGPU::S_XOR_B32_term;
|
|
||||||
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
|
|
||||||
ExecReg = AMDGPU::EXEC_LO;
|
|
||||||
} else {
|
|
||||||
MovExecOpc = AMDGPU::S_MOV_B64;
|
|
||||||
MovExecTermOpc = AMDGPU::S_MOV_B64_term;
|
|
||||||
XorTermOpc = AMDGPU::S_XOR_B64_term;
|
|
||||||
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
|
|
||||||
ExecReg = AMDGPU::EXEC;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
const int OrigRangeSize = std::distance(BeginIt, EndIt);
|
const int OrigRangeSize = std::distance(BeginIt, EndIt);
|
||||||
@@ -270,7 +258,7 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
|||||||
B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
|
B.buildIntrinsic(Intrinsic::amdgcn_ballot, CondRegLM).addReg(CondReg);
|
||||||
|
|
||||||
// Update EXEC, save the original EXEC value to SavedExec.
|
// Update EXEC, save the original EXEC value to SavedExec.
|
||||||
B.buildInstr(AndSaveExecOpc)
|
B.buildInstr(LMC.AndSaveExecOpc)
|
||||||
.addDef(SavedExec)
|
.addDef(SavedExec)
|
||||||
.addReg(CondRegLM, RegState::Kill);
|
.addReg(CondRegLM, RegState::Kill);
|
||||||
MRI.setSimpleHint(SavedExec, CondRegLM);
|
MRI.setSimpleHint(SavedExec, CondRegLM);
|
||||||
@@ -278,7 +266,10 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
|||||||
B.setInsertPt(*BodyBB, BodyBB->end());
|
B.setInsertPt(*BodyBB, BodyBB->end());
|
||||||
|
|
||||||
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
|
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
|
||||||
B.buildInstr(XorTermOpc).addDef(ExecReg).addReg(ExecReg).addReg(SavedExec);
|
B.buildInstr(LMC.XorTermOpc)
|
||||||
|
.addDef(LMC.ExecReg)
|
||||||
|
.addReg(LMC.ExecReg)
|
||||||
|
.addReg(SavedExec);
|
||||||
|
|
||||||
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
|
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
|
||||||
// s_cbranch_scc0?
|
// s_cbranch_scc0?
|
||||||
@@ -288,11 +279,11 @@ bool RegBankLegalizeHelper::executeInWaterfallLoop(MachineIRBuilder &B,
|
|||||||
|
|
||||||
// Save the EXEC mask before the loop.
|
// Save the EXEC mask before the loop.
|
||||||
B.setInsertPt(MBB, MBB.end());
|
B.setInsertPt(MBB, MBB.end());
|
||||||
B.buildInstr(MovExecOpc).addDef(SaveExecReg).addReg(ExecReg);
|
B.buildInstr(LMC.MovOpc).addDef(SaveExecReg).addReg(LMC.ExecReg);
|
||||||
|
|
||||||
// Restore the EXEC mask after the loop.
|
// Restore the EXEC mask after the loop.
|
||||||
B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
|
B.setInsertPt(*RestoreExecBB, RestoreExecBB->begin());
|
||||||
B.buildInstr(MovExecTermOpc).addDef(ExecReg).addReg(SaveExecReg);
|
B.buildInstr(LMC.MovTermOpc).addDef(LMC.ExecReg).addReg(SaveExecReg);
|
||||||
|
|
||||||
// Set the insert point after the original instruction, so any new
|
// Set the insert point after the original instruction, so any new
|
||||||
// instructions will be in the remainder.
|
// instructions will be in the remainder.
|
||||||
|
|||||||
Reference in New Issue
Block a user