Files
llvm-project/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp
macurtis-amd b9ae01500d AMDGPU: Add NextUseAnalysis Pass (#178873)
Based on
- https://github.com/llvm/llvm-project/pull/156079 and
- https://github.com/llvm/llvm-project/pull/171520

See those PRs for background.

Provides a compatibility mode option
`--amdgpu-next-use-analysis-compatibility-mode` that produces results
that match either PR #156079 (`compute`) or PR #171520 (`graphics`).

Co-authored-by: alex-t <atimofee@amd.com>
Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com>

---------

Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com>
2026-04-16 12:05:59 -05:00

2615 lines
92 KiB
C++

//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the AMDGPUNextUseAnalysis pass, a machine-level analysis
// that computes the distance from each instruction to the "nearest" next use of
// every live virtual register. These distances guide register spilling
// decisions by identifying which live values are furthest from their next use
// and are therefore the best candidates to spill.
//
// The analysis is based on the Braun & Hack CC'09 paper "Register Spilling and
// Live-Range Splitting for SSA-Form Programs."
//
// Key concepts:
//
// NextUseDistance A loop-depth-weighted instruction count representing
// how far away a register's next use is. Distances
// through deeper loops are scaled by fromLoopDepth() so
// that uses inside hot loops appear closer.
//
// Inter-block Pre-computed shortest weighted distances between all
// distances pairs of basic blocks, used to efficiently answer
// cross-block next-use queries. Each intermediate block
// is weighted by fromLoopDepth() applied once per loop
// boundary crossing relative to the destination.
//
// Configuration flags (see Config struct in the header):
//
// CountPhis Count PHI instructions toward distance and block size.
// ForwardOnly Restrict inter-block distances to forward-reachable
// paths.
// PreciseUseModeling Model PHI uses at their incoming edge block and filter
// uses with intermediate redefinitions.
// PromoteToPreheader Route loop-entry and inner-loop uses to the preheader.
//
// This file contains:
//
// - Command-line options for configuration and debug output
// - LiveRegUse / JSON helpers
// - AMDGPUNextUseAnalysisImpl (the main analysis implementation)
// - Instruction ID assignment and block size computation
// - CFG path pre-computation (reachability, loop depth, back-edges)
// - Inter-block distance computation
// - Per-register next-use distance queries and caching
// - AMDGPUNextUseAnalysis (public facade, pimpl)
// - Legacy and new pass manager wrappers
//
//===----------------------------------------------------------------------===//
#include "AMDGPUNextUseAnalysis.h"
#include "AMDGPU.h"
#include "GCNRegPressure.h"
#include "GCNSubtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <limits>
#include <string>
using namespace llvm;
#define DEBUG_TYPE "amdgpu-next-use-analysis"
//==============================================================================
// Options etc
//==============================================================================
namespace {
cl::opt<bool>
DistanceCacheEnabled("amdgpu-next-use-analysis-distance-cache",
cl::init(true), cl::Hidden,
cl::desc("Enable live-reg-use distance cache"));
cl::opt<std::string>
DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
cl::Hidden);
cl::opt<bool> DumpNextUseDistanceDefToUse(
"amdgpu-next-use-analysis-dump-distance-def-to-use", cl::init(false),
cl::Hidden);
cl::opt<bool>
DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
cl::init(false), cl::Hidden);
// 'graphics' and 'compute' modes arose due to initial competing implementations
// of next-use analysis that emphasized different types of workloads. This
// implementation is a compromise that combines aspects of both. Over time, the
// hope is we will be able to remove some of these differences and settle on a
// more unified implementation.
cl::opt<std::string>
ConfigPresetOpt("amdgpu-next-use-analysis-config", cl::Hidden,
cl::init("graphics"),
cl::desc("Config preset: 'graphics' or 'compute'"));
cl::opt<bool> ConfigCountPhisOpt(
"amdgpu-next-use-analysis-count-phis", cl::Hidden,
cl::desc("Count PHI instructions toward distance and block size"));
cl::opt<bool> ConfigForwardOnlyOpt(
"amdgpu-next-use-analysis-forward-only", cl::Hidden,
cl::desc("Restrict inter-block distances to forward-reachable paths"));
cl::opt<bool> ConfigPreciseUseModelingOpt(
"amdgpu-next-use-analysis-precise-use-modeling", cl::Hidden,
cl::desc("Model PHI uses via incoming edge block with loop-aware "
"reachability filtering"));
cl::opt<bool> ConfigPromoteToPreheaderOpt(
"amdgpu-next-use-analysis-use-preheader-model", cl::Hidden,
cl::desc("Promote loop-entry and inner-loop uses to the loop preheader"));
} // namespace
//==============================================================================
// LiveRegUse - Represents a live register use with its distance. Used for
// tracking and sorting register uses by distance.
//==============================================================================
namespace {
using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
struct LiveRegUse : public UseDistancePair {
// 'nullptr' indicates an unset/invalid state.
LiveRegUse() : UseDistancePair(nullptr, 0) {}
LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
: UseDistancePair(Use, Dist) {}
LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
bool isUnset() const { return Use == nullptr; }
Register getReg() const { return Use->getReg(); }
unsigned getSubReg() const { return Use->getSubReg(); }
LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
return TRI->getSubRegIndexLaneMask(Use->getSubReg());
}
bool isCloserThan(const LiveRegUse &X) const {
if (Dist < X.Dist)
return true;
if (Dist > X.Dist)
return false;
if (Use == X.Use)
return false;
// Ugh. When !CountPhis, PHIs and the first non-PHI instruction have id
// 0. In this case, consider PHIs as less than the first non-PHI
// instruction.
const MachineInstr *ThisMI = Use->getParent();
const MachineInstr *XMI = X.Use->getParent();
const MachineBasicBlock *ThisMBB = ThisMI->getParent();
if (ThisMBB == XMI->getParent()) {
if (ThisMI->isPHI() && !XMI->isPHI() &&
XMI == &(*ThisMBB->getFirstNonPHI()))
return true;
}
// Ensure deterministic results
return X.getReg() < getReg();
}
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr,
const MachineRegisterInfo *MRI = nullptr) const {
if (isUnset()) {
OS << "<unset>";
return;
}
Dist.print(OS);
OS << " [" << printReg(getReg(), TRI, getSubReg(), MRI) << "]";
}
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << '\n';
}
};
inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
if (!Closest.Use || X.isCloserThan(Closest)) {
Closest = X;
return true;
}
return false;
}
inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
if (!Furthest.Use || Furthest.isCloserThan(X)) {
Furthest = X;
return true;
}
return false;
}
} // namespace
//==============================================================================
// JSON helpers
//==============================================================================
namespace {
template <typename Lambda>
void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
J.attributeBegin(Name);
raw_ostream &OS = J.rawValueBegin();
OS << '"';
L(OS);
OS << '"';
J.rawValueEnd();
J.attributeEnd();
}
void printStringAttr(json::OStream &J, const char *Name, Printable P) {
printStringAttr(J, Name, [&](raw_ostream &OS) { OS << P; });
}
void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
ModuleSlotTracker &MST) {
printStringAttr(J, Name, [&](raw_ostream &OS) {
MI.print(OS, MST,
/* IsStandalone */ false,
/* SkipOpers */ false,
/* SkipDebugLoc */ false,
/* AddNewLine ---> */ false,
/* TargetInstrInfo */ nullptr);
});
}
void printMBBNameAttr(json::OStream &J, const char *Name,
const MachineBasicBlock &MBB, ModuleSlotTracker &MST) {
printStringAttr(J, Name, [&](raw_ostream &OS) {
MBB.printName(OS, MachineBasicBlock::PrintNameIr, &MST);
});
}
template <typename NameLambda, typename ValueT>
void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
std::string Name;
raw_string_ostream NameOS(Name);
NL(NameOS);
J.attribute(NameOS.str(), V);
}
template <typename ValueT>
void printAttr(json::OStream &J, const Printable &P, ValueT V) {
printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
}
} // namespace
//==============================================================================
// AMDGPUNextUseAnalysisImpl
//==============================================================================
class llvm::AMDGPUNextUseAnalysisImpl {
public:
struct CacheableNextUseDistance {
bool IsInstrRelative;
NextUseDistance Distance;
};
static constexpr bool InstrRelative = true;
static constexpr bool InstrInvariant = false;
private:
const MachineFunction *MF = nullptr;
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
const MachineLoopInfo *MLI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
using InstrIdTy = unsigned;
using InstrToIdMap = DenseMap<const MachineInstr *, InstrIdTy>;
InstrToIdMap InstrToId;
AMDGPUNextUseAnalysis::Config Cfg;
void initializeTables() {
for (const MachineBasicBlock &BB : *MF)
calcInstrIds(&BB, InstrToId);
initializeCfgPaths();
initializeInterBlockDistances();
}
void clearTables() {
InstrToId.clear();
RegUseMap.clear();
Paths.clear();
resetDistanceCache();
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Instruction Ids
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
unsigned sizeOf(const MachineInstr &MI) const {
// When !Cfg.CountPhis, PHIs do not contribute to distances/sizes since they
// generally don't result in the generation of a machine instruction.
// FIXME: Consider using MI.isPseudo() or maybe MI.isMetaInstruction().
return Cfg.CountPhis ? 1 : !MI.isPHI();
}
void calcInstrIds(const MachineBasicBlock *BB,
InstrToIdMap &MutableInstrToId) const {
InstrIdTy Id = 0;
for (auto &MI : BB->instrs()) {
MutableInstrToId[&MI] = Id;
Id += sizeOf(MI);
}
}
/// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
/// found in the map.
InstrIdTy getInstrId(const MachineInstr *MI) const {
auto It = InstrToId.find(MI);
if (It != InstrToId.end())
return It->second;
// Renumber the MBB.
// TODO: Renumber from MI onwards.
auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
calcInstrIds(MI->getParent(), MutableInstrToId);
return InstrToId.find(MI)->second;
}
// Length of the segment from MI (inclusive) to the first instruction of the
// basic block.
InstrIdTy getHeadLen(const MachineInstr *MI) const {
const MachineBasicBlock *MBB = MI->getParent();
return getInstrId(MI) + getInstrId(&MBB->instr_front()) + 1;
}
// Length of the segment from MI (exclusive) to the last instruction of the
// basic block.
InstrIdTy getTailLen(const MachineInstr *MI) const {
const MachineBasicBlock *MBB = MI->getParent();
return getInstrId(&MBB->instr_back()) - getInstrId(MI);
}
// Length of the segment from 'From' to 'To' (exclusive). Both instructions
// must be in the same basic block.
InstrIdTy getDistance(const MachineInstr *From,
const MachineInstr *To) const {
assert(From->getParent() == To->getParent());
return getInstrId(To) - getInstrId(From);
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// RegUses - cache of uses by register
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
const SmallVector<const MachineOperand *> &
getRegisterUses(Register Reg) const {
auto I = RegUseMap.find(Reg);
if (I != RegUseMap.end())
return I->second;
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
SmallVector<const MachineOperand *> &Uses = NonConstThis->RegUseMap[Reg];
for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
if (!UseMO.isUndef())
Uses.push_back(&UseMO);
}
return Uses;
}
bool hasAtLeastOneUse(Register Reg) const {
return !getRegisterUses(Reg).empty();
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Paths
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
class Path {
using StorageTy =
std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
StorageTy P;
public:
constexpr Path() : P(nullptr, nullptr) {}
constexpr Path(const MachineBasicBlock *Src, const MachineBasicBlock *Dst)
: P(Src, Dst) {}
Path(const StorageTy &Pair) : P(Pair) {}
constexpr operator const StorageTy &() const { return P; }
using DenseMapInfo = llvm::DenseMapInfo<StorageTy>;
const MachineBasicBlock *src() const { return P.first; }
const MachineBasicBlock *dst() const { return P.second; }
};
enum class EdgeKind { Back = -1, None = 0, Forward = 1 };
static constexpr StringRef toString(EdgeKind EK) {
if (EK == EdgeKind::Back)
return "back";
if (EK == EdgeKind::Forward)
return "fwd";
return "none";
}
struct PathInfo {
EdgeKind EK;
bool Reachable;
int ForwardReachable;
unsigned RelativeLoopDepth;
std::optional<NextUseDistance> ShortestDistance;
std::optional<NextUseDistance> ShortestUnweightedDistance;
InstrIdTy Size;
PathInfo()
: EK(EdgeKind::None), Reachable(false), ForwardReachable(-1),
RelativeLoopDepth(0), Size(0) {}
bool isBackedge() const { return EK == EdgeKind::Back; }
bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
bool isForwardReachableUnset() const { return ForwardReachable < 0; }
bool isForwardReachable() const { return ForwardReachable == 1; }
bool isNotForwardReachable() const { return ForwardReachable == 0; }
void print(raw_ostream &OS) const {
OS << "{ek=" << toString(EK) << " reach=" << Reachable
<< " fwd-reach=" << ForwardReachable
<< " loop-depth=" << RelativeLoopDepth << " size=" << Size;
if (ShortestDistance) {
OS << " shortest-dist=";
ShortestDistance->print(OS);
}
if (ShortestUnweightedDistance) {
OS << " shortest-unweighted-dist=";
ShortestUnweightedDistance->print(OS);
}
OS << "}";
}
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << '\n';
}
};
//----------------------------------------------------------------------------
// Path Storage - 'Paths' is lazily populated and some members are lazily
// computed. All mutations should go through one of the 'initializePathInfo*'
// flavors below.
//----------------------------------------------------------------------------
DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
auto I = Paths.find({From, To});
return I == Paths.end() ? nullptr : &I->second;
}
PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
auto &MutablePaths = NonConstThis->Paths;
Path P(From, To);
auto [I, Inserted] = MutablePaths.try_emplace(P);
if (!Inserted)
return I->second;
bool Reachable = calcIsReachable(P.src(), P.dst());
// Iterator may have been invalidated by calcIsReachable, so get a fresh
// reference to the slot.
return NonConstThis->initializePathInfo(MutablePaths.at(P), P,
EdgeKind::None, Reachable);
}
const PathInfo &pathInfoFor(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return getOrInitPathInfo(From, To);
}
//----------------------------------------------------------------------------
// initializePathInfo* - various flavors of PathInfo initialization. They
// (should) always funnel to the first flavor below.
//----------------------------------------------------------------------------
PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
bool Reachable) {
Slot.EK = EK;
Slot.Reachable = Reachable;
Slot.ForwardReachable = EK == EdgeKind::None ? -1 : EK == EdgeKind::Forward;
Slot.RelativeLoopDepth =
Slot.Reachable ? calcRelativeLoopDepth(P.src(), P.dst()) : 0;
Slot.Size = P.src() == P.dst() ? calcSize(P.src()) : 0;
if (EK != EdgeKind::None)
Slot.ShortestUnweightedDistance = 0;
return Slot;
}
PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
auto &MutablePaths = NonConstThis->Paths;
return NonConstThis->initializePathInfo(MutablePaths[P], P, EK, Reachable);
}
std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
bool Reachable) const {
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
auto &MutablePaths = NonConstThis->Paths;
auto [I, Inserted] = MutablePaths.try_emplace(P);
if (Inserted)
NonConstThis->initializePathInfo(I->second, P, EK, Reachable);
return {&I->second, Inserted};
}
bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
const MachineBasicBlock *To,
bool Value) const {
PathInfo &Slot = getOrInitPathInfo(From, To);
assert(Slot.isForwardReachableUnset());
Slot.ForwardReachable = Value;
return Value;
}
NextUseDistance
initializePathInfoShortestDistance(const MachineBasicBlock *From,
const MachineBasicBlock *To,
NextUseDistance Value) const {
PathInfo &Slot = getOrInitPathInfo(From, To);
assert(!Slot.ShortestDistance.has_value());
Slot.ShortestDistance = Value;
return Value;
}
NextUseDistance
initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
const MachineBasicBlock *To,
NextUseDistance Value) const {
PathInfo &Slot = getOrInitPathInfo(From, To);
assert(!Slot.ShortestUnweightedDistance.has_value());
Slot.ShortestUnweightedDistance = Value;
return Value;
}
//----------------------------------------------------------------------------
// initialize*Paths
//----------------------------------------------------------------------------
private:
void initializePaths(const SmallVector<Path> &ReachablePaths,
const SmallVector<Path> &UnreachablePaths) const {
for (const Path &P : ReachablePaths)
initializePathInfo(P, EdgeKind::None, true);
for (const Path &P : UnreachablePaths)
initializePathInfo(P, EdgeKind::None, false);
}
void
initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
const SmallVector<Path> &UnreachablePaths) const {
for (bool R : {true, false}) {
const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
for (const Path &P : ToInit) {
PathInfo &Slot = getOrInitPathInfo(P.src(), P.dst());
assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
Slot.ForwardReachable = R;
}
}
}
// Follow the control flow graph starting at the entry block until all blocks
// have been visited. Along the way, initialize the PathInfo for each edge
// traversed.
void initializeCfgPaths() {
Paths.clear();
enum VisitState { Undiscovered, Visiting, Finished };
DenseMap<const MachineBasicBlock *, VisitState> State;
SmallVector<const MachineBasicBlock *> Work{&MF->front()};
State[&MF->front()] = Undiscovered;
while (!Work.empty()) {
const MachineBasicBlock *Src = Work.back();
VisitState &SrcState = State[Src];
// A block may already be 'Finished' if it is reachable from multiple
// predecessors causing it to be pushed more than once while still
// 'Undiscovered'.
if (SrcState == Visiting || SrcState == Finished) {
Work.pop_back();
SrcState = Finished;
continue;
}
SrcState = Visiting;
for (const MachineBasicBlock *Dst : Src->successors()) {
const VisitState DstState = State.lookup(Dst);
EdgeKind EK;
if (DstState == Undiscovered) {
EK = EdgeKind::Forward;
Work.push_back(Dst);
} else if (DstState == Visiting) {
EK = EdgeKind::Back;
} else {
EK = EdgeKind::Forward;
}
Path P(Src, Dst);
assert(!Paths.contains(P));
initializePathInfo(P, EK, /*Reachable*/ true);
}
}
LLVM_DEBUG(dumpPaths());
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Loop helpers
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
static bool isStandAloneLoop(const MachineLoop *Loop) {
return Loop->getSubLoops().empty() && Loop->isOutermost();
}
static MachineLoop *findChildLoop(MachineLoop *const Parent,
MachineLoop *Descendant) {
for (MachineLoop *L = Descendant; L != Parent; L = L->getParentLoop()) {
if (L->getParentLoop() == Parent)
return L;
}
return nullptr;
}
// If loops 'A' and 'B' share a common parent loop, return that loop and the
// depth of 'A' relative to it. Otherwise return nullptr and the loop depth of
// 'A'.
static std::pair<MachineLoop *, unsigned>
findCommonParent(MachineLoop *A, const MachineLoop *B) {
unsigned Depth = 0;
for (; A != nullptr; A = A->getParentLoop(), ++Depth) {
if (A->contains(B))
break;
}
return {A, Depth};
}
static const MachineBasicBlock *
getOutermostPreheader(const MachineLoop *Loop) {
return Loop ? Loop->getOutermostLoop()->getLoopPreheader() : nullptr;
}
static MachineBasicBlock *findChildPreheader(MachineLoop *const Parent,
MachineLoop *Descendant) {
MachineLoop *ChildLoop = findChildLoop(Parent, Descendant);
return ChildLoop ? ChildLoop->getLoopPreheader() : nullptr;
}
static const MachineBasicBlock *
getIncomingBlockIfPhiUse(const MachineInstr *MI, const MachineOperand *MO) {
return MI->isPHI() ? MI->getOperand(MO->getOperandNo() + 1).getMBB()
: nullptr;
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Calculate features
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
InstrIdTy calcSize(const MachineBasicBlock *BB) const {
InstrIdTy Size = BB->size();
if (!Cfg.CountPhis)
Size -= std::distance(BB->begin(), BB->getFirstNonPHI());
return Size;
}
NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return NextUseDistance::fromSize(getSize(From),
getRelativeLoopDepth(From, To));
}
// Return the loop depth of 'From' relative to 'To'.
unsigned calcRelativeLoopDepth(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
MachineLoop *LoopFrom = MLI->getLoopFor(From);
MachineLoop *LoopTo = MLI->getLoopFor(To);
if (!LoopFrom)
return 0;
if (!LoopTo)
return LoopFrom->getLoopDepth();
if (LoopFrom->contains(LoopTo)) // covers LoopFrom == LoopTo
return 0;
if (LoopTo->contains(LoopFrom))
return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
// Loops are siblings of some sort.
return findCommonParent(LoopFrom, LoopTo).second;
}
// Attempt to find a path from 'From' to 'To' using a depth first search. If
// 'ForwardOnly' is true, do not follow backedges. As a performance
// improvement, this may initialize reachable intermediate paths or paths we
// determine are unreachable.
bool calcIsReachable(const MachineBasicBlock *From,
const MachineBasicBlock *To,
bool ForwardOnly = false) const {
if (From == To && !MLI->getLoopFor(From))
return false;
if (!ForwardOnly && interBlockDistanceExists(From, To))
return true;
enum { VisitOp, PopOp };
using MBBOpPair = std::pair<const MachineBasicBlock *, int>;
SmallVector<MBBOpPair> Work{{From, VisitOp}};
DenseSet<const MachineBasicBlock *> Visited{From};
SmallVector<Path> IntermediatePath;
SmallVector<Path> Unreachable;
// Should be run at every function exit point.
auto Finally = [&](bool Reachable) {
// This is an optimization. For intermediate paths we found while
// calculating reachability for 'From' --> 'To', remember their
// reachability.
if (!Reachable) {
IntermediatePath.clear();
for (const MachineBasicBlock *MBB : Visited) {
if (MBB != From)
Unreachable.emplace_back(MBB, To);
}
}
if (ForwardOnly)
initializeForwardOnlyPaths(IntermediatePath, Unreachable);
else
initializePaths(IntermediatePath, Unreachable);
return Reachable;
};
while (!Work.empty()) {
auto [Current, Op] = Work.pop_back_val();
// Backtracking
if (Op == PopOp) {
IntermediatePath.pop_back();
if (ForwardOnly)
Unreachable.emplace_back(Current, To);
continue;
}
if (Current->succ_empty())
continue;
if (Current != From) {
IntermediatePath.emplace_back(Current, To);
Work.emplace_back(Current, PopOp);
}
for (const MachineBasicBlock *Succ : Current->successors()) {
if (ForwardOnly && isBackedge(Current, Succ))
continue;
if (Succ == To)
return Finally(true);
if (auto CachedReachable = isMaybeReachable(Succ, To, ForwardOnly)) {
if (CachedReachable.value())
return Finally(true);
Visited.insert(Succ);
continue;
}
if (Visited.insert(Succ).second)
Work.emplace_back(Succ, VisitOp);
}
}
return Finally(false);
}
//----------------------------------------------------------------------------
// Inter-block distance - the weighted and unweighted cost (i.e. "distance")
// to travel from one MachineBasicBlock to another.
//
// Values are pre-computed and stored in 'InterBlockDistances' using a
// backwards data-flow algorithm similar to the one described in 4.1 of a
// "Register Spilling and Live-Range Splitting for SSA-Form Programs" by
// Matthias Braun and Sebastian Hack, CC'09. This replaced a prior
// implementation based on Dijkstra's shortest path algorithm.
//----------------------------------------------------------------------------
private:
struct InterBlockDistance {
NextUseDistance Weighted;
NextUseDistance Unweighted;
InterBlockDistance() : Weighted(-1), Unweighted(-1) {}
InterBlockDistance(NextUseDistance W, NextUseDistance UW)
: Weighted(W), Unweighted(UW) {}
bool operator==(const InterBlockDistance &Other) const {
return Weighted == Other.Weighted && Unweighted == Other.Unweighted;
}
bool operator!=(const InterBlockDistance &Other) const {
return !(*this == Other);
}
void print(raw_ostream &OS) const {
OS << "{W=";
Weighted.print(OS);
OS << " U=";
Unweighted.print(OS);
OS << "}";
}
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << '\n';
}
};
using InterBlockDistanceMap =
DenseMap<unsigned, DenseMap<unsigned, InterBlockDistance>>;
InterBlockDistanceMap InterBlockDistances;
void initializeInterBlockDistances() {
InterBlockDistanceMap Distances;
bool Changed;
do {
Changed = false;
for (const MachineBasicBlock *MBB : post_order(MF)) {
unsigned MBBNum = MBB->getNumber();
// Save previous state for convergence check
InterBlockDistanceMap::mapped_type Prev = std::move(Distances[MBBNum]);
InterBlockDistanceMap::mapped_type Curr;
Curr.reserve(Prev.size());
// Direct successors are distance 0 by definition: no instructions are
// executed between exiting MBB and entering Succ.
for (const MachineBasicBlock *Succ : MBB->successors())
Curr[Succ->getNumber()] = InterBlockDistance(0, 0);
// Propagate further destinations through each successor.
for (const MachineBasicBlock *Succ : MBB->successors()) {
unsigned SuccNum = Succ->getNumber();
const unsigned UnweightedSize{getSize(Succ)};
for (const auto &[DestBlockNum, DestDist] : Distances[SuccNum]) {
// MBB -> MBB is considered unreachable (getInterBlockDistance
// asserts From != To).
if (DestBlockNum == MBBNum)
continue;
const MachineBasicBlock *DestMBB =
MF->getBlockNumbered(DestBlockNum);
const NextUseDistance UnweightedDist{UnweightedSize +
DestDist.Unweighted};
unsigned SuccToDestLoopDepth = calcRelativeLoopDepth(Succ, DestMBB);
const NextUseDistance WeightedDist =
DestDist.Weighted +
NextUseDistance::fromSize(UnweightedSize, SuccToDestLoopDepth);
// Insert or update distances (take minimum)
auto [I, First] =
Curr.try_emplace(DestBlockNum, WeightedDist, UnweightedDist);
if (!First) {
InterBlockDistance &Slot = I->second;
Slot.Weighted = min(Slot.Weighted, WeightedDist);
Slot.Unweighted = min(Slot.Unweighted, UnweightedDist);
}
}
}
Changed |= (Prev != Curr);
Distances[MBBNum] = std::move(Curr);
}
} while (Changed);
InterBlockDistances = std::move(Distances);
LLVM_DEBUG(dumpInterBlockDistances());
}
const InterBlockDistance *
getInterBlockDistanceMapValue(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
auto I = InterBlockDistances.find(From->getNumber());
if (I == InterBlockDistances.end())
return nullptr;
const InterBlockDistanceMap::mapped_type &FromSlot = I->second;
auto J = FromSlot.find(To->getNumber());
return J == FromSlot.end() ? nullptr : &J->second;
}
bool interBlockDistanceExists(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return getInterBlockDistanceMapValue(From, To);
}
NextUseDistance getInterBlockDistance(const MachineBasicBlock *From,
const MachineBasicBlock *To,
bool Unweighted) const {
assert(From != To && "The basic blocks should be different.");
if (!From || !To)
return NextUseDistance::unreachable();
if (Cfg.ForwardOnly && !isForwardReachable(From, To))
return NextUseDistance::unreachable();
const InterBlockDistance *BD = getInterBlockDistanceMapValue(From, To);
if (!BD)
return NextUseDistance::unreachable();
return Unweighted ? BD->Unweighted : BD->Weighted;
}
NextUseDistance
getWeightedInterBlockDistance(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return getInterBlockDistance(From, To, false);
}
NextUseDistance
getUnweightedInterBlockDistance(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return getInterBlockDistance(From, To, true);
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Feature getters. Use cached results if available. If not calculate.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
InstrIdTy getSize(const MachineBasicBlock *BB) const {
return pathInfoFor(BB, BB).Size;
}
bool isReachable(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return pathInfoFor(From, To).Reachable;
}
bool isReachableOrSame(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return From == To || pathInfoFor(From, To).Reachable;
}
bool isForwardReachable(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
const PathInfo &PI = pathInfoFor(From, To);
if (PI.isForwardReachableSet())
return PI.isForwardReachable();
return initializePathInfoForwardReachable(
From, To,
PI.Reachable && calcIsReachable(From, To, /*ForwardOnly*/ true));
}
// Return true/false if we know that 'To' is reachable or not from
// 'From'. Otherwise return 'std::nullopt'.
std::optional<bool> isMaybeReachable(const MachineBasicBlock *From,
const MachineBasicBlock *To,
bool ForwardOnly) const {
const PathInfo *PI = maybePathInfoFor(From, To);
if (!PI)
return std::nullopt;
if (ForwardOnly) {
if (PI->isForwardReachable())
return true;
if (PI->isNotForwardReachable())
return false;
return std::nullopt;
}
return PI->Reachable;
}
bool isBackedge(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return pathInfoFor(From, To).isBackedge();
}
// Can be used as a substitute for DT->dominates(A, B) if A and B are in the
// same basic block.
bool instrsAreInOrder(const MachineInstr *A, const MachineInstr *B) const {
assert(A->getParent() == B->getParent() &&
"instructions must be in the same basic block!");
if (A == B || getInstrId(A) < getInstrId(B))
return true;
if (!A->isPHI())
return false;
if (!B->isPHI())
return true;
for (auto &PHI : A->getParent()->phis()) {
if (&PHI == A)
return true;
if (&PHI == B)
return false;
}
return false;
}
unsigned getRelativeLoopDepth(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
return pathInfoFor(From, To).RelativeLoopDepth;
}
NextUseDistance getShortestPath(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
std::optional<NextUseDistance> MaybeD =
pathInfoFor(From, To).ShortestDistance;
if (MaybeD.has_value())
return MaybeD.value();
NextUseDistance Dist = getWeightedInterBlockDistance(From, To);
return initializePathInfoShortestDistance(From, To, Dist);
}
NextUseDistance getShortestUnweightedPath(const MachineBasicBlock *From,
const MachineBasicBlock *To) const {
std::optional<NextUseDistance> MaybeD =
pathInfoFor(From, To).ShortestUnweightedDistance;
if (MaybeD.has_value())
return MaybeD.value();
return initializePathInfoShortestUnweightedDistance(
From, To, getUnweightedInterBlockDistance(From, To));
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/// MBBDistPair - Represents the distance to a machine basic block.
/// Used for returning both the distance and the target block together.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
struct MBBDistPair {
NextUseDistance Distance;
const MachineBasicBlock *MBB;
MBBDistPair() : Distance(NextUseDistance::unreachable()), MBB(nullptr) {}
MBBDistPair(NextUseDistance D, const MachineBasicBlock *B)
: Distance(D), MBB(B) {}
MBBDistPair operator+(NextUseDistance D) { return {Distance + D, MBB}; }
MBBDistPair &operator+=(NextUseDistance D) {
Distance += D;
return *this;
}
void print(raw_ostream &OS) const {
OS << "{";
Distance.print(OS);
if (MBB)
OS << " " << printMBBReference(*MBB);
else
OS << " <null>";
OS << "}";
}
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << '\n';
}
};
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// CFG Helpers
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
// Return the shortest distance to a latch
MBBDistPair calcShortestDistanceToLatch(const MachineBasicBlock *CurMBB,
const MachineLoop *CurLoop) const {
SmallVector<MachineBasicBlock *, 2> Latches;
CurLoop->getLoopLatches(Latches);
MBBDistPair LD;
for (MachineBasicBlock *LMBB : Latches) {
if (LMBB == CurMBB)
return {0, CurMBB};
NextUseDistance Dst = getShortestPath(CurMBB, LMBB);
if (Dst < LD.Distance) {
LD.Distance = Dst;
LD.MBB = LMBB;
}
}
return LD;
}
// Return the shortest unweighted distance to a latch
MBBDistPair
calcShortestUnweightedDistanceToLatch(const MachineBasicBlock *CurMBB,
const MachineLoop *CurLoop) const {
SmallVector<MachineBasicBlock *, 2> Latches;
CurLoop->getLoopLatches(Latches);
MBBDistPair LD;
for (MachineBasicBlock *LMBB : Latches) {
if (LMBB == CurMBB)
return {0, CurMBB};
NextUseDistance Dst = getShortestUnweightedPath(CurMBB, LMBB);
if (Dst < LD.Distance) {
LD.Distance = Dst;
LD.MBB = LMBB;
}
}
return LD;
}
// Return the shortest distance to an exit
MBBDistPair calcShortestDistanceToExit(const MachineBasicBlock *CurMBB,
const MachineLoop *CurLoop) const {
SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ExitEdges;
CurLoop->getExitEdges(ExitEdges);
MBBDistPair LD;
for (auto [Exit, Dest] : ExitEdges) {
if (Exit == CurMBB)
return {0, CurMBB};
NextUseDistance Dst = getShortestPath(CurMBB, Exit);
if (Dst < LD.Distance) {
LD.Distance = Dst;
LD.MBB = Exit;
}
}
return LD;
}
// Return the shortest distance through a loop (header to latch) that goes
// through CurMBB.
MBBDistPair
calcShortestDistanceThroughInnermostLoop(const MachineBasicBlock *CurMBB,
MachineLoop *CurLoop) const {
assert(MLI->getLoopFor(CurMBB) == CurLoop);
// This is a hot spot. Check it before doing anything else.
if (CurLoop->getNumBlocks() == 1)
return {getSize(CurMBB), CurMBB};
MachineBasicBlock *LoopHeader = CurLoop->getHeader();
MBBDistPair LD{0, nullptr};
LD += getSize(LoopHeader);
if (CurMBB != LoopHeader)
LD += getShortestPath(LoopHeader, CurMBB);
if (CurLoop->isLoopExiting(CurMBB))
LD.MBB = CurMBB;
else
LD = calcShortestDistanceToExit(CurMBB, CurLoop) + LD.Distance;
if (CurMBB != LoopHeader && CurMBB != LD.MBB)
LD += getSize(CurMBB);
if (LD.MBB != LoopHeader)
LD += getSize(LD.MBB);
return LD;
}
// Return the shortest distance through a loop (header to latch) that goes
// through CurMBB.
MBBDistPair calcShortestDistanceThroughLoop(const MachineBasicBlock *CurMBB,
MachineLoop *OuterLoop) const {
MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
MBBDistPair CurLD =
calcShortestDistanceThroughInnermostLoop(CurMBB, CurLoop);
MachineBasicBlock *CurHdr = CurLoop->getHeader();
for (;;) {
if (OuterLoop == CurLoop)
return CurLD;
MachineLoop *ParentLoop = CurLoop->getParentLoop();
MachineBasicBlock *ParentHdr = ParentLoop->getHeader();
MBBDistPair LD{0, nullptr};
LD += getSize(ParentHdr);
LD += getShortestPath(ParentHdr, CurHdr);
LD += CurLD.Distance.applyLoopWeight();
LD = calcShortestDistanceToExit(CurLD.MBB, ParentLoop) + LD.Distance;
LD += getSize(LD.MBB);
CurLD = LD;
CurLoop = ParentLoop;
CurHdr = ParentHdr;
}
llvm_unreachable("CurMBB not contained in OuterLoop");
}
// Similar to calcShortestDistanceThroughLoop with LoopWeight applied to the
// returned distance.
MBBDistPair
calcWeightedDistanceThroughLoopViaMBB(const MachineBasicBlock *CurMBB,
MachineLoop *CurLoop) const {
MBBDistPair LD = calcShortestDistanceThroughLoop(CurMBB, CurLoop);
LD.Distance = LD.Distance.applyLoopWeight();
return LD;
}
// Return the weighted, shortest distance through a loop (header to latch).
// If ParentLoop is provided, use it to adjust the loop depth.
MBBDistPair calcWeightedDistanceThroughLoop(
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
const MachineLoop *ParentLoop = nullptr) const {
if (CurLoop->getNumBlocks() != 1)
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
unsigned LoopDepth = MLI->getLoopDepth(CurMBB);
if (ParentLoop)
LoopDepth -= ParentLoop->getLoopDepth();
return {NextUseDistance::fromSize(getSize(CurMBB), LoopDepth),
CurLoop->getLoopLatch()};
}
// Calculate total distance from exit point to use instruction
NextUseDistance appendDistanceToUse(const MBBDistPair &Exit,
const MachineInstr *UseMI,
const MachineBasicBlock *UseMBB) const {
return Exit.Distance + getShortestPath(Exit.MBB, UseMBB) +
getHeadLen(UseMI);
}
// Return the weighted, shortest distance through the CurLoop which is a
// sub-loop of UseLoop.
MBBDistPair calcDistanceThroughSubLoopUse(const MachineBasicBlock *CurMBB,
MachineLoop *CurLoop,
MachineLoop *UseLoop) const {
// All the sub-loops of the UseLoop will be executed before the use.
// Hence, we should take this into consideration in distance calculation.
MachineLoop *UseLoopSubLoop = findChildLoop(UseLoop, CurLoop);
assert(UseLoopSubLoop && "CurLoop should be nested in UseLoop");
return calcWeightedDistanceThroughLoop(CurMBB, UseLoopSubLoop, UseLoop);
}
// Similar to calcDistanceThroughSubLoopUse, adding the distance to 'UseMI'.
NextUseDistance calcDistanceThroughSubLoopToUseMI(
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
MachineLoop *UseLoop) const {
return appendDistanceToUse(
calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop), UseMI, UseMBB);
}
// Return the weighted distance through a loop to an outside use loop.
// Differentiates between uses inside or outside of the current loop nest.
MBBDistPair calcDistanceThroughLoopToOutsideLoopUse(
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
assert(!CurLoop->contains(UseLoop));
if (isStandAloneLoop(CurLoop))
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
MachineLoop *OutermostLoop = CurLoop->getOutermostLoop();
if (!OutermostLoop->contains(UseLoop)) {
// We should take into consideration the whole loop nest in the
// calculation of the distance because we will reach the use after
// executing the whole loop nest.
// ... But make sure that we pick a route that goes through CurMBB
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, OutermostLoop);
}
// At this point we know that CurLoop and UseLoop are independent and they
// are in the same loop nest.
if (MLI->getLoopDepth(CurMBB) <= MLI->getLoopDepth(UseMBB))
return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
assert(CurLoop != OutermostLoop && "The loop cannot be the outermost.");
const unsigned UseLoopDepth = MLI->getLoopDepth(UseMBB);
for (;;) {
if (CurLoop->getLoopDepth() == UseLoopDepth)
break;
CurLoop = CurLoop->getParentLoop();
if (CurLoop == OutermostLoop)
break;
}
return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
}
// Similar to calcDistanceThroughLoopToOutsideLoopUse but adds the distance to
// an instruction in the loop.
NextUseDistance calcDistanceThroughLoopToOutsideLoopUseMI(
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
MachineLoop *UseLoop) const {
return appendDistanceToUse(calcDistanceThroughLoopToOutsideLoopUse(
CurMBB, CurLoop, UseMBB, UseLoop),
UseMI, UseMBB);
}
// Return true if 'MO' is covered by 'LaneMask'
bool machineOperandCoveredBy(const MachineOperand &MO,
LaneBitmask LaneMask) const {
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
return (Mask & LaneMask) == Mask;
}
// Returns true iff uses of LiveReg/LiveLaneMask in PHI UseMI are coming from
// a backedge when starting at CurMI.
bool isIncomingValFromBackedge(Register LiveReg, LaneBitmask LiveLaneMask,
const MachineInstr *CurMI,
const MachineInstr *UseMI) const {
if (!UseMI->isPHI())
return false;
MachineLoop *CurLoop = MLI->getLoopFor(CurMI->getParent());
MachineLoop *UseLoop = MLI->getLoopFor(UseMI->getParent());
// Not a backedge if ...
// A: not in a loop at all
// B: or CurMI is in a loop outside of UseLoop
// C: or UseMI is not in the UseLoop header
if (/*A:*/ !UseLoop ||
/*B:*/ (CurLoop && !UseLoop->contains(CurLoop)) ||
/*C:*/ UseMI->getParent() != UseLoop->getHeader())
return false;
SmallVector<MachineBasicBlock *, 2> Latches;
UseLoop->getLoopLatches(Latches);
const unsigned NumOps = UseMI->getNumOperands();
for (unsigned I = 1; I < NumOps; I += 2) {
const MachineOperand &RegMO = UseMI->getOperand(I - 1);
const MachineOperand &MBBMO = UseMI->getOperand(I);
assert(RegMO.isReg() && "Expected register operand of PHI");
assert(MBBMO.isMBB() && "Expected MBB operand of PHI");
if (RegMO.getReg() == LiveReg &&
machineOperandCoveredBy(RegMO, LiveLaneMask)) {
MachineBasicBlock *IncomingBB = MBBMO.getMBB();
if (llvm::is_contained(Latches, IncomingBB))
return true;
}
}
return false;
}
// Return the distance from 'CurMI' through a parent loop backedge PHI Use
// ('UseMI').
CacheableNextUseDistance calcDistanceViaEnclosingBackedge(
const MachineInstr *CurMI, const MachineBasicBlock *CurMBB,
MachineLoop *CurLoop, const MachineInstr *UseMI,
const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
assert(UseLoop && "There is no backedge.");
assert(CurLoop && (UseLoop != CurLoop) && UseLoop->contains(CurLoop) &&
"Unexpected loop configuration");
InstrIdTy UseHeadLen = getHeadLen(UseMI);
MBBDistPair InnerLoopLD =
calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop);
MBBDistPair LD = calcShortestDistanceToLatch(InnerLoopLD.MBB, UseLoop);
return {InstrInvariant,
InnerLoopLD.Distance + LD.Distance + getSize(LD.MBB) + UseHeadLen};
}
// Optimized version of calcBackedgeDistance when we already know that CurMI
// and UseMI are in the same basic block
NextUseDistance calcBackedgeDistance(const MachineInstr *CurMI,
const MachineBasicBlock *CurMBB,
MachineLoop *CurLoop,
const MachineInstr *UseMI) const {
// use is in the next loop iteration
InstrIdTy CurTailLen = getTailLen(CurMI);
InstrIdTy UseHeadLen = getHeadLen(UseMI);
MBBDistPair LD = calcShortestUnweightedDistanceToLatch(CurMBB, CurLoop);
const MachineBasicBlock *HdrMBB = CurLoop->getHeader();
NextUseDistance Hdr = CurMBB == HdrMBB ? 0 : getSize(HdrMBB);
NextUseDistance Dst =
CurMBB == HdrMBB ? 0 : getShortestUnweightedPath(HdrMBB, CurMBB);
return CurTailLen + LD.Distance + getSize(LD.MBB) + Hdr + Dst + UseHeadLen;
}
//----------------------------------------------------------------------------
// Calculate inter-instruction distances
//----------------------------------------------------------------------------
private:
// Calculate the shortest weighted path from MachineInstruction 'FromMI' to
// 'ToMI'. It is weighted distance in that paths that exit loops are made to
// look much further away.
NextUseDistance calcShortestDistance(const MachineInstr *FromMI,
const MachineInstr *ToMI) const {
const MachineBasicBlock *FromMBB = FromMI->getParent();
const MachineBasicBlock *ToMBB = ToMI->getParent();
if (FromMBB == ToMBB) {
NextUseDistance RV = getDistance(FromMI, ToMI);
assert(RV >= 0 && "unexpected negative distance from getDistance");
return RV;
}
InstrIdTy FromTailLen = getTailLen(FromMI);
InstrIdTy ToHeadLen = getHeadLen(ToMI);
NextUseDistance Dst = getShortestPath(FromMBB, ToMBB);
assert(Dst.isReachable() &&
"calcShortestDistance called for instructions in non-reachable"
" basic blocks!");
NextUseDistance RV = FromTailLen + Dst + ToHeadLen;
assert(RV >= 0 && "unexpected negative distance");
return RV;
}
// Calculate the shortest unweighted path from MachineInstruction 'FromMI' to
// 'ToMI'. In contrast with 'calcShortestDistance', distances are based solely
// on basic block instruction counts and traversing a loop exit does not
// affect the value.
NextUseDistance
calcShortestUnweightedDistance(const MachineInstr *FromMI,
const MachineInstr *ToMI) const {
const MachineBasicBlock *FromMBB = FromMI->getParent();
const MachineBasicBlock *ToMBB = ToMI->getParent();
if (FromMBB == ToMBB)
return getDistance(FromMI, ToMI);
InstrIdTy FromTailLen = getTailLen(FromMI);
InstrIdTy ToHeadLen = getHeadLen(ToMI);
NextUseDistance Dst = getShortestUnweightedPath(FromMBB, ToMBB);
assert(Dst.isReachable() &&
"calcShortestUnweightedDistance called for instructions in"
" non-reachable basic blocks!");
return FromTailLen + Dst + ToHeadLen;
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// calcDistanceToUse* - various flavors of calculating the distance from an
// instruction 'CurMI' to the use of a live [sub]register.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
// Return the distance from 'CurMI' to a live [sub]register use ('UseMI').
//
// Cfg flags controlling behavior:
// PreciseUseModeling — rewrite PHI uses to their incoming edge block;
// also selects unweighted cross-block distance
// PromoteToPreheader — route loop-entry / inner-loop uses to the preheader
CacheableNextUseDistance
calcDistanceToUse(Register LiveReg, LaneBitmask LiveLaneMask,
const MachineInstr &CurMI,
const MachineOperand *UseMO) const {
const MachineInstr *UseMI = UseMO->getParent();
const MachineBasicBlock *CurMBB = CurMI.getParent();
const MachineBasicBlock *UseMBB = UseMI->getParent();
MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
if (Cfg.PreciseUseModeling) {
// Map PHI use to the end of its incoming edge block.
if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
UseMI = &PhiUseEdge->back();
UseMBB = PhiUseEdge;
UseLoop = MLI->getLoopFor(PhiUseEdge);
}
}
enum class LoopConfig {
NoCur,
Same,
CurContainsUse,
UseContainsCur,
Siblings,
Unrelated
};
auto [LpCfg, PreHdr, CommonParent] = [&]()
-> std::tuple<LoopConfig, const MachineBasicBlock *, MachineLoop *> {
if (!CurLoop) {
return {LoopConfig::NoCur, getOutermostPreheader(UseLoop), nullptr};
}
if (CurLoop->contains(UseLoop)) {
return {CurMBB == UseMBB ? LoopConfig::Same
: LoopConfig::CurContainsUse,
findChildPreheader(CurLoop, UseLoop), nullptr};
}
if (MachineLoop *P = findCommonParent(UseLoop, CurLoop).first) {
if (P != UseLoop)
return {LoopConfig::Siblings, findChildPreheader(P, UseLoop), P};
return {LoopConfig::UseContainsCur, nullptr, nullptr};
}
return {LoopConfig::Unrelated, getOutermostPreheader(UseLoop), nullptr};
}();
//--------------------------------------------------------------------------
// Don't PromoteToPreheader
//--------------------------------------------------------------------------
if (!Cfg.PromoteToPreheader) {
switch (LpCfg) {
case LoopConfig::NoCur:
case LoopConfig::Same:
case LoopConfig::CurContainsUse:
return {InstrRelative, calcShortestDistance(&CurMI, UseMI)};
case LoopConfig::UseContainsCur: {
if (isIncomingValFromBackedge(LiveReg, LiveLaneMask, &CurMI, UseMI)) {
return calcDistanceViaEnclosingBackedge(&CurMI, CurMBB, CurLoop,
UseMI, UseMBB, UseLoop);
}
return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
}
case LoopConfig::Siblings:
case LoopConfig::Unrelated:
return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
}
llvm_unreachable("unexpected loop configuration!");
}
//--------------------------------------------------------------------------
// PromoteToPreheader
//--------------------------------------------------------------------------
if (PreHdr) {
UseMI = &PreHdr->back();
UseMBB = PreHdr;
UseLoop = CommonParent;
}
switch (LpCfg) {
case LoopConfig::NoCur:
return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI) -
(sizeOf(*UseMI) ? 0 : 1)};
case LoopConfig::Same:
case LoopConfig::CurContainsUse:
if (CurMBB == UseMBB && !instrsAreInOrder(&CurMI, UseMI))
return {InstrRelative,
calcBackedgeDistance(&CurMI, CurMBB, CurLoop, UseMI)};
return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI)};
case LoopConfig::UseContainsCur:
case LoopConfig::Siblings:
return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
case LoopConfig::Unrelated:
return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
}
llvm_unreachable("unexpected loop configuration!");
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// getUses helpers (compute mode)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
// Returns true if Use is reachable from MI. Handles backedges and intervening
// defs.
bool isUseReachablePrecise(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineOperand *UseMO,
const MachineInstr *UseMI,
const MachineBasicBlock *UseMBB) const {
// Filter out uses that are clearly unreachable
if (MBB != UseMBB && !isReachable(MBB, UseMBB))
return false;
// PHI uses are considered part of the incoming BB. Check for reachability
// at the edge.
if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
if (!isReachableOrSame(MBB, PhiUseEdge))
return false;
}
// Filter out uses with an intermediate def.
const MachineInstr *DefMI = MRI->getUniqueVRegDef(UseMO->getReg());
const MachineBasicBlock *DefMBB = DefMI->getParent();
if (MBB == UseMBB) {
if (UseMI->isPHI() && MBB == DefMBB)
return true;
if (instrsAreInOrder(&MI, UseMI))
return true;
// A Def in the loop means that the value at MI will not survive through
// to this use.
MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
return UseLoop && !UseLoop->contains(DefMBB);
}
if (MBB == DefMBB)
return instrsAreInOrder(DefMI, &MI);
MachineLoop *Loop = MLI->getLoopFor(MBB);
if (!Loop)
return true;
MachineLoop *TopLoop = Loop->getOutermostLoop();
return !TopLoop->contains(DefMBB) || !isReachable(MBB, DefMBB) ||
!isForwardReachable(UseMBB, MBB);
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Debug/Developer Helpers
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
/// Goes over all MBB pairs in \p MF, calculates the shortest path between
/// them.
void populatePathTable() {
for (const MachineBasicBlock &MBB1 : *MF) {
for (const MachineBasicBlock &MBB2 : *MF) {
if (&MBB1 == &MBB2)
continue;
getShortestPath(&MBB1, &MBB2);
}
}
}
void printPaths(raw_ostream &OS) const {
OS << "\n---------------- Paths --------------- {\n";
for (const auto &[P, PI] : Paths) {
OS << " " << printMBBReference(*P.src()) << " -> "
<< printMBBReference(*P.dst()) << ": ";
PI.print(OS);
OS << '\n';
}
OS << "}\n";
}
LLVM_DUMP_METHOD void dumpPaths() const { printPaths(dbgs()); }
// Legacy alias kept for existing call sites.
void dumpShortestPaths() const {
for (const auto &P : Paths) {
const MachineBasicBlock *From = P.first.src();
const MachineBasicBlock *To = P.first.dst();
std::optional<NextUseDistance> Dist = P.second.ShortestDistance;
dbgs() << "From: " << printMBBReference(*From)
<< "-> To:" << printMBBReference(*To) << " = "
<< Dist.value_or(-1).fmt() << "\n";
}
}
void printInterBlockDistances(raw_ostream &OS) const {
using MBBPair = std::pair<unsigned, unsigned>;
using Elem = std::pair<NextUseDistance, MBBPair>;
std::vector<Elem> SortedDistances;
for (const auto &[FromNum, Dsts] : InterBlockDistances) {
for (const auto &[ToNum, Dist] : Dsts) {
SortedDistances.emplace_back(Dist.Weighted, MBBPair(FromNum, ToNum));
}
}
llvm::sort(SortedDistances, [](const auto &A, const auto &B) {
if (A.first != B.first)
return A.first < B.first;
if (A.second.first != B.second.first)
return A.second.first < B.second.first;
return A.second.second < B.second.second;
});
OS << "\n--------- InterBlockDistances -------- {\n";
for (const Elem &E : SortedDistances) {
OS << " bb." << E.second.first << " -> bb." << E.second.second << ": ";
E.first.print(OS);
OS << '\n';
}
OS << "}\n";
}
LLVM_DUMP_METHOD void dumpInterBlockDistances() const {
printInterBlockDistances(dbgs());
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// LiveRegUse Caching - A cache of the distances for the last
// MachineInstruction. When getting the distances for a MachineInstruction, if
// it is the same basic block as the cached instruction, we can generally use
// an offset from the cached values to compute the distances. There are some
// exceptions - see 'cacheLiveRegUse'.
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
struct LiveRegToUseMapElem {
LiveRegUse Use;
bool MIDependent;
LiveRegToUseMapElem() : Use(), MIDependent(false) {}
LiveRegToUseMapElem(LiveRegUse U, bool MIDep)
: Use(U), MIDependent(MIDep) {}
void print(raw_ostream &OS) const {
Use.print(OS);
OS << (MIDependent ? " [mi-dep]" : " [mi-indep]");
}
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << '\n';
}
};
// Using std::map because LaneBitmask does not work out-of-the-box as a
// DenseMap key and I did not see a performance benefit over std::map.
using LaneBitmaskToUseMap = std::map<LaneBitmask, LiveRegToUseMapElem>;
using LiveRegToUseMap = DenseMap<Register, LaneBitmaskToUseMap>;
const MachineInstr *CachedDistancesMI = nullptr;
LiveRegToUseMap CachedDistances;
LiveRegToUseMap PendingCachedDistances;
unsigned DistanceCacheHits = 0;
unsigned DistanceCacheMisses = 0;
void resetDistanceCache() {
CachedDistancesMI = nullptr;
CachedDistances.clear();
DistanceCacheHits = 0;
DistanceCacheMisses = 0;
}
void maybeClearCachedLiveRegUses(const MachineInstr &MI) {
if (CachedDistancesMI &&
(CachedDistancesMI->getParent() != MI.getParent() ||
!instrsAreInOrder(CachedDistancesMI, &MI))) {
CachedDistancesMI = nullptr;
CachedDistances.clear();
}
}
bool okToUseCacheElem(const LiveRegToUseMapElem &CacheElem,
const MachineInstr &MI, const InstrIdTy LastDelta) {
if (!CacheElem.MIDependent)
return true;
const LiveRegUse &U = CacheElem.Use;
// Never okay to produce a negative distance
if (U.Dist < LastDelta)
return false;
const MachineInstr *UseMI = U.Use->getParent();
// Always okay if use is in another basic block or UseMI is MI
if (UseMI->getParent() != MI.getParent() || UseMI == &MI)
return true;
// If CachedDistancesMI <= Use < MI we could have a problem since we don't
// know if Use is still reachable.
return !instrsAreInOrder(CachedDistancesMI, UseMI) ||
!instrsAreInOrder(UseMI, &MI);
}
std::pair<const LaneBitmaskToUseMap *, const LiveRegToUseMapElem *>
findCachedLiveRegUse(Register Reg, LaneBitmask LaneMask,
const MachineInstr &MI, const InstrIdTy LastDelta) {
if (!DistanceCacheEnabled)
return {nullptr, nullptr};
++DistanceCacheMisses; // Assume miss
auto I = CachedDistances.find(Reg);
if (I == CachedDistances.end())
return {nullptr, nullptr};
const LaneBitmaskToUseMap &RegSlot = I->second;
if (RegSlot.empty())
return {nullptr, nullptr};
auto J = RegSlot.find(LaneMask);
if (J == RegSlot.end())
return {nullptr, nullptr};
const LiveRegToUseMapElem &MaskSlot = J->second;
if (!okToUseCacheElem(MaskSlot, MI, LastDelta))
return {nullptr, nullptr};
--DistanceCacheMisses;
++DistanceCacheHits;
return {&RegSlot, &MaskSlot};
}
void cacheLiveRegUse(const MachineInstr &MI, Register Reg, LaneBitmask Mask,
LiveRegUse U, bool MIDependent) {
if (!DistanceCacheEnabled)
return;
auto I = PendingCachedDistances.try_emplace(Reg).first;
LaneBitmaskToUseMap &RegSlot = I->second;
RegSlot.try_emplace(Mask, U, MIDependent);
}
void updateCachedLiveRegUses(const MachineInstr &MI) {
if (!DistanceCacheEnabled)
return;
CachedDistancesMI = &MI;
CachedDistances = std::move(PendingCachedDistances);
PendingCachedDistances.clear();
LLVM_DEBUG(dumpDistanceCache());
}
void printDistanceCache(raw_ostream &OS) const {
OS << "\n----------- Distance Cache ----------- {\n";
OS << " CachedAt: ";
if (CachedDistancesMI)
OS << *CachedDistancesMI;
else
OS << "<none>\n";
constexpr size_t RegNameWidth = 20;
for (const auto &[Reg, ByMask] : CachedDistances) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
LaneBitmask AllLanes = MRI->getMaxLaneMaskForVReg(Reg);
for (const auto &[Mask, Elem] : ByMask) {
std::string RegName;
raw_string_ostream KOS(RegName);
if (Mask == AllLanes) {
KOS << printReg(Reg);
} else {
SmallVector<unsigned> Indexes;
TRI->getCoveringSubRegIndexes(RC, Mask, Indexes);
if (Indexes.size() == 1)
KOS << printReg(Reg, TRI, Indexes.front(), MRI);
else
KOS << printReg(Reg) << " mask=" << Mask.getAsInteger();
}
OS << " " << left_justify(RegName, RegNameWidth) << " : ";
Elem.print(OS);
OS << '\n';
}
}
OS << " (hits=" << DistanceCacheHits << " misses=" << DistanceCacheMisses
<< ")\n";
OS << "}\n";
}
LLVM_DUMP_METHOD void dumpDistanceCache() const {
printDistanceCache(dbgs());
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Processing Live Reg Uses
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
// Decompose each use in 'Uses' by sub-reg and store the nearest one in
// 'UseByMask'. Ignores subregs matching 'LiveRegLaneMask' - these are handled
// as registers, not sub-regs.
DenseMap<const TargetRegisterClass *, SmallVector<unsigned>>
SubRegIndexesForRegClass;
void collectSubRegUsesByMask(
const SmallVectorImpl<const MachineOperand *> &Uses,
const SmallVectorImpl<CacheableNextUseDistance> &Distances,
LaneBitmask LiveRegLaneMask, LaneBitmaskToUseMap &UseByMask) {
assert(Uses.size());
assert(Uses.size() == Distances.size());
const TargetRegisterClass *RC = MRI->getRegClass(Uses.front()->getReg());
auto [SRI, Inserted] = SubRegIndexesForRegClass.try_emplace(RC);
if (Inserted)
TRI->getCoveringSubRegIndexes(RC, LaneBitmask::getAll(), SRI->second);
const SmallVector<unsigned> &RCSubRegIndexes = SRI->second;
unsigned OneIndex; // Backing store for 'Indexes' below when 1 index
for (size_t I = 0; I < Uses.size(); ++I) {
const MachineOperand *MO = Uses[I];
auto [SubRegMIDep, Dist] = Distances[I];
const LiveRegUse LRU{MO, Dist};
ArrayRef<unsigned> Indexes;
if (MO->getSubReg()) {
OneIndex = MO->getSubReg();
Indexes = ArrayRef(OneIndex);
} else {
Indexes = RCSubRegIndexes;
}
for (unsigned Idx : Indexes) {
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(Idx);
if (Mask.all() || Mask == LiveRegLaneMask)
continue;
auto &[SlotU, SlotMIDep] = UseByMask[Mask];
if (updateClosest(SlotU, LRU))
SlotMIDep = SubRegMIDep;
}
}
}
// Similar to 'collectSubRegUsesByMask' above, but uses cached distances.
void collectSubRegUsesByMaskFromCache(const LaneBitmaskToUseMap &CachedMap,
LaneBitmask LiveRegLaneMask,
const MachineInstr *MI,
InstrIdTy LastDelta,
LaneBitmaskToUseMap &UseByMask) {
for (const auto &KV : CachedMap) {
LaneBitmask SubregLaneMask = KV.first;
if (SubregLaneMask.all() || SubregLaneMask == LiveRegLaneMask)
continue;
const LiveRegToUseMapElem &SubregE = KV.second;
if (!okToUseCacheElem(SubregE, *MI, LastDelta))
continue;
const bool MIDep = SubregE.MIDependent;
LiveRegUse U = SubregE.Use;
if (MIDep)
U.Dist -= LastDelta;
auto &[SlotU, SlotMIDep] = UseByMask[SubregLaneMask];
if (updateClosest(SlotU, U))
SlotMIDep = MIDep;
}
}
// Loops through 'UseByMask' finding the furthest sub-register and updating
// 'FurthestSubreg' accordingly.
void updateFurthestSubReg(
const MachineInstr &MI, const LiveRegUse &U,
const LaneBitmaskToUseMap &UseByMask,
DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses,
LiveRegUse &FurthestSubreg) {
if (UseByMask.empty()) {
updateFurthest(FurthestSubreg, U);
return;
}
for (const auto &KV : UseByMask) {
const LiveRegUse &SubregU = KV.second.Use;
const bool SubregMIDep = KV.second.MIDependent;
if (RelevantUses)
RelevantUses->try_emplace(SubregU.Use, SubregU);
cacheLiveRegUse(MI, SubregU.Use->getReg(), KV.first, SubregU,
SubregMIDep);
updateFurthest(FurthestSubreg, SubregU);
}
}
// Used to populate 'MIDefs' to be passed to 'getNextUseDistances'.
SmallSet<Register, 4> collectDefinedRegisters(const MachineInstr &MI) const {
SmallSet<Register, 4> MIDefs;
for (const MachineOperand &MO : MI.all_defs()) {
if (MO.isReg() && MO.getReg().isValid() && hasAtLeastOneUse(MO.getReg()))
MIDefs.insert(MO.getReg());
}
return MIDefs;
}
// Computes distances from 'MI' to each registers in 'LiveRegs'. Returns the
// furthest register and (optionally) sub-register in 'Furthest' and
// 'FurthestSubreg' respectively.
public:
void getNextUseDistances(const GCNRPTracker::LiveRegSet &LiveRegs,
const MachineInstr &MI, LiveRegUse &Furthest,
LiveRegUse *FurthestSubreg = nullptr,
DenseMap<const MachineOperand *, UseDistancePair>
*RelevantUses = nullptr) {
const SmallSet<Register, 4> MIDefs(collectDefinedRegisters(MI));
SmallVector<const MachineOperand *> Uses;
SmallVector<CacheableNextUseDistance> Distances;
LaneBitmaskToUseMap UseByMask;
maybeClearCachedLiveRegUses(MI);
const InstrIdTy LastDelta =
CachedDistancesMI ? getDistance(CachedDistancesMI, &MI) : 0;
for (auto &KV : LiveRegs) {
const Register Reg = KV.first;
const LaneBitmask LaneMask = KV.second;
if (MIDefs.contains(Reg))
continue;
Uses.clear();
UseByMask.clear();
LiveRegUse U;
bool MIDependent = false;
auto [CacheMap, CacheElem] =
findCachedLiveRegUse(Reg, LaneMask, MI, LastDelta);
if (CacheMap && CacheElem) {
MIDependent = CacheElem->MIDependent;
U = CacheElem->Use;
if (MIDependent)
U.Dist -= LastDelta;
} else {
getReachableUses(Reg, LaneMask, MI, Uses);
if (Uses.empty())
continue;
const MachineOperand *NextUse = nullptr;
NextUseDistance Dist = getShortestDistance(
Reg, LaneMask, MI, Uses, &NextUse, &MIDependent, &Distances);
U = LiveRegUse{NextUse, Dist};
}
if (RelevantUses)
RelevantUses->try_emplace(U.Use, U);
cacheLiveRegUse(MI, Reg, LaneMask, U, MIDependent);
updateFurthest(Furthest, U);
if (!FurthestSubreg)
continue;
if (CacheMap) {
collectSubRegUsesByMaskFromCache(*CacheMap, LaneMask, &MI, LastDelta,
UseByMask);
} else {
collectSubRegUsesByMask(Uses, Distances, LaneMask, UseByMask);
}
updateFurthestSubReg(MI, U, UseByMask, RelevantUses, *FurthestSubreg);
}
updateCachedLiveRegUses(MI);
}
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Helper methods for printAsJson
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private:
static format_object<unsigned> Fmt(unsigned Id) { return format("%u", Id); }
public:
void printVerboseInstrFields(json::OStream &J, const MachineInstr &MI) const {
J.attribute("id", getInstrId(&MI));
J.attribute("head-len", getHeadLen(&MI));
J.attribute("tail-len", getTailLen(&MI));
}
void printPaths(json::OStream &J, ModuleSlotTracker &MST) const {
J.attributeBegin("paths");
J.arrayBegin();
for (const auto &KV : Paths) {
const Path &P = KV.first;
const PathInfo &PI = KV.second;
J.objectBegin();
printMBBNameAttr(J, "src", *P.src(), MST);
printMBBNameAttr(J, "dst", *P.dst(), MST);
if (PI.ShortestDistance.has_value()) {
J.attribute("shortest-distance",
PI.ShortestDistance.value().toJsonValue());
} else {
J.attribute("shortest-distance", nullptr);
}
if (PI.ShortestUnweightedDistance.has_value()) {
J.attribute("shortest-unweighted-distance",
PI.ShortestUnweightedDistance.value().toJsonValue());
} else {
J.attribute("shortest-unweighted-distance", nullptr);
}
J.attribute("edge-kind", static_cast<int>(PI.EK));
J.attribute("reachable", PI.Reachable);
J.attribute("forward-reachable", PI.ForwardReachable);
J.objectEnd();
}
J.arrayEnd();
J.attributeEnd();
}
public:
AMDGPUNextUseAnalysisImpl(const MachineFunction *, const MachineLoopInfo *);
~AMDGPUNextUseAnalysisImpl() { clearTables(); }
AMDGPUNextUseAnalysis::Config getConfig() const { return Cfg; }
void setConfig(AMDGPUNextUseAnalysis::Config NewCfg) {
Cfg = NewCfg;
clearTables();
initializeTables();
}
unsigned getDistanceCacheHits() const { return DistanceCacheHits; }
unsigned getDistanceCacheMisses() const { return DistanceCacheMisses; }
void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
const MachineInstr &MI,
SmallVector<const MachineOperand *> &Uses) const;
/// \Returns the shortest next-use distance for \p LiveReg.
NextUseDistance
getShortestDistance(Register LiveReg, LaneBitmask LaneMask,
const MachineInstr &FromMI,
const SmallVector<const MachineOperand *> &Uses,
const MachineOperand **ShortestUseOut, bool *MIDependent,
SmallVector<CacheableNextUseDistance> *Distances) const;
NextUseDistance
getShortestDistance(Register LiveReg, const MachineInstr &FromMI,
const SmallVector<const MachineOperand *> &Uses) const {
return getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI, Uses,
nullptr, nullptr, nullptr);
}
};
AMDGPUNextUseAnalysisImpl::AMDGPUNextUseAnalysisImpl(
const MachineFunction *MF, const MachineLoopInfo *ML) {
this->MF = MF;
this->MLI = ML;
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF->getRegInfo();
// FIXME: Hopefully we will soon converge on a single way of calculating
// next-use distance and remove these presets.
if (ConfigPresetOpt == "compute")
Cfg = AMDGPUNextUseAnalysis::Config::Compute();
else
Cfg = AMDGPUNextUseAnalysis::Config::Graphics();
if (ConfigCountPhisOpt.getNumOccurrences())
Cfg.CountPhis = ConfigCountPhisOpt;
if (ConfigForwardOnlyOpt.getNumOccurrences())
Cfg.ForwardOnly = ConfigForwardOnlyOpt;
if (ConfigPreciseUseModelingOpt.getNumOccurrences())
Cfg.PreciseUseModeling = ConfigPreciseUseModelingOpt;
if (ConfigPromoteToPreheaderOpt.getNumOccurrences())
Cfg.PromoteToPreheader = ConfigPromoteToPreheaderOpt;
initializeTables();
}
NextUseDistance AMDGPUNextUseAnalysisImpl::getShortestDistance(
Register LiveReg, LaneBitmask LaneMask, const MachineInstr &CurMI,
const SmallVector<const MachineOperand *> &Uses,
const MachineOperand **ShortestUseOut, bool *CurMIDependentOut,
SmallVector<CacheableNextUseDistance> *Distances) const {
assert(!LiveReg.isPhysical() && !TRI->isAGPR(*MRI, LiveReg) &&
"Next-use distance is calculated for SGPRs and VGPRs");
const MachineOperand *NextUse = nullptr;
auto NextUseDist = NextUseDistance::unreachable();
bool CurMIDependent = false;
if (Distances) {
Distances->clear();
Distances->reserve(Uses.size());
}
for (auto *UseMO : Uses) {
auto [Dep, D] = calcDistanceToUse(LiveReg, LaneMask, CurMI, UseMO);
if (D < NextUseDist) {
NextUseDist = D;
NextUse = UseMO;
CurMIDependent = Dep;
}
if (Distances)
Distances->push_back({Dep, D});
}
if (ShortestUseOut)
*ShortestUseOut = NextUse;
if (CurMIDependentOut)
*CurMIDependentOut = CurMIDependent;
assert(NextUseDist.isReachable() &&
"getShortestDistance called with no reachable uses");
return NextUseDist;
}
void AMDGPUNextUseAnalysisImpl::getReachableUses(
Register Reg, LaneBitmask LaneMask, const MachineInstr &MI,
SmallVector<const MachineOperand *> &Uses) const {
const bool CheckMask = LaneMask != LaneBitmask::getAll() &&
LaneMask != MRI->getMaxLaneMaskForVReg(Reg);
const MachineBasicBlock *MBB = MI.getParent();
for (const MachineOperand *UseMO : getRegisterUses(Reg)) {
const MachineInstr *UseMI = UseMO->getParent();
const MachineBasicBlock *UseMBB = UseMI->getParent();
if (CheckMask && !machineOperandCoveredBy(*UseMO, LaneMask))
continue;
bool Reachable;
if (Cfg.PreciseUseModeling)
Reachable = isUseReachablePrecise(MI, MBB, UseMO, UseMI, UseMBB);
else if (MBB == UseMBB)
Reachable = instrsAreInOrder(&MI, UseMI);
else
Reachable = isForwardReachable(MBB, UseMBB);
if (Reachable)
Uses.push_back(UseMO);
}
}
//==============================================================================
// AMDGPUNextUseAnalysis
//==============================================================================
AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(const MachineFunction *MF,
const MachineLoopInfo *MLI) {
Impl = std::make_unique<AMDGPUNextUseAnalysisImpl>(MF, MLI);
}
AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other)
: Impl(std::move(Other.Impl)) {}
AMDGPUNextUseAnalysis::~AMDGPUNextUseAnalysis() {}
AMDGPUNextUseAnalysis &
AMDGPUNextUseAnalysis::operator=(AMDGPUNextUseAnalysis &&Other) {
if (this != &Other)
Impl = std::move(Other.Impl);
return *this;
}
AMDGPUNextUseAnalysis::Config AMDGPUNextUseAnalysis::getConfig() const {
return Impl->getConfig();
}
void AMDGPUNextUseAnalysis::setConfig(Config Cfg) { Impl->setConfig(Cfg); }
/// \Returns the next-use distance for \p LiveReg.
NextUseDistance AMDGPUNextUseAnalysis::getShortestDistance(
Register LiveReg, const MachineInstr &FromMI,
const SmallVector<const MachineOperand *> &Uses,
const MachineOperand **ShortestUseOut,
SmallVector<NextUseDistance> *DistancesOut) const {
SmallVector<AMDGPUNextUseAnalysisImpl::CacheableNextUseDistance> Distances;
auto Dist = Impl->getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI,
Uses, ShortestUseOut, nullptr,
DistancesOut ? &Distances : nullptr);
if (DistancesOut) {
for (auto [MIDep, D] : Distances)
DistancesOut->push_back(D);
}
return Dist;
}
void AMDGPUNextUseAnalysis::getNextUseDistances(
const DenseMap<unsigned, LaneBitmask> &LiveRegs, const MachineInstr &MI,
UseDistancePair &FurthestOut, UseDistancePair *FurthestSubregOut,
DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses) const {
LiveRegUse Furthest;
LiveRegUse FurthestSubreg;
Impl->getNextUseDistances(LiveRegs, MI, Furthest,
FurthestSubregOut ? &FurthestSubreg : nullptr,
RelevantUses);
FurthestOut = Furthest;
if (FurthestSubregOut)
*FurthestSubregOut = FurthestSubreg;
}
void AMDGPUNextUseAnalysis::getReachableUses(
Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI,
SmallVector<const MachineOperand *> &Uses) const {
return Impl->getReachableUses(LiveReg, LaneMask, MI, Uses);
}
//==============================================================================
// AMDGPUNextUseAnalysisLegacyPass
//==============================================================================
//------------------------------------------------------------------------------
// Legacy Analysis Pass
//------------------------------------------------------------------------------
AMDGPUNextUseAnalysisLegacyPass::AMDGPUNextUseAnalysisLegacyPass()
: MachineFunctionPass(ID) {}
StringRef AMDGPUNextUseAnalysisLegacyPass::getPassName() const {
return "Next Use Analysis";
}
bool AMDGPUNextUseAnalysisLegacyPass::runOnMachineFunction(
MachineFunction &MF) {
const MachineLoopInfo *MLI =
&getAnalysis<MachineLoopInfoWrapperPass>().getLI();
NUA.reset(new AMDGPUNextUseAnalysis(&MF, MLI));
return false;
}
void AMDGPUNextUseAnalysisLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfoWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
char AMDGPUNextUseAnalysisLegacyPass::ID = 0;
char &llvm::AMDGPUNextUseAnalysisLegacyID = AMDGPUNextUseAnalysisLegacyPass::ID;
INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
"Next Use Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
"Next Use Analysis", false, true)
FunctionPass *llvm::createAMDGPUNextUseAnalysisLegacyPass() {
return new AMDGPUNextUseAnalysisLegacyPass();
}
//------------------------------------------------------------------------------
// New Pass Manager Analysis Pass
//------------------------------------------------------------------------------
AnalysisKey AMDGPUNextUseAnalysisPass::Key;
AMDGPUNextUseAnalysisPass::Result
AMDGPUNextUseAnalysisPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
const MachineLoopInfo &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
return AMDGPUNextUseAnalysis(&MF, &MLI);
}
//==============================================================================
// AMDGPUNextUseAnalysisPrinterLegacyPass
//==============================================================================
namespace {
void printInstrMember(json::OStream &J, ModuleSlotTracker &MST,
const MachineInstr &MI,
const AMDGPUNextUseAnalysisImpl &NUA) {
printStringAttr(J, "instr", MI, MST);
if (DumpNextUseDistanceVerbose)
NUA.printVerboseInstrFields(J, MI);
}
void printDistances(
json::OStream &J, const MachineRegisterInfo &MRI, const SIRegisterInfo &TRI,
ModuleSlotTracker &MST,
const DenseMap<const MachineOperand *, UseDistancePair> &Uses) {
if (!DumpNextUseDistanceVerbose)
return;
// Sorting isn't necessary for the purposes of JSON, but it reduces
// FileCheck differences.
SmallVector<const MachineOperand *> Keys;
for (const MachineOperand *K : Uses.keys())
Keys.push_back(K);
llvm::sort(Keys, [](const auto &A, const auto &B) {
return A->getReg() < B->getReg() ||
(A->getReg() == B->getReg() && A->getSubReg() < B->getSubReg());
});
J.attributeBegin("distances");
J.objectBegin();
for (const MachineOperand *K : Keys) {
const LiveRegUse U = Uses.at(K);
printAttr(J, printReg(U.getReg(), &TRI, U.getSubReg(), &MRI),
U.Dist.toJsonValue());
}
J.objectEnd();
J.attributeEnd();
}
void printFurthestUse(json::OStream &J, const MachineRegisterInfo &MRI,
const SIRegisterInfo &TRI, ModuleSlotTracker &MST,
const LiveRegUse F, bool Subreg = false) {
J.attributeBegin(Subreg ? "furthest-subreg" : "furthest");
J.objectBegin();
if (F.Use) {
printStringAttr(
J, "register",
printReg(F.getReg(), &TRI, Subreg ? F.getSubReg() : 0, &MRI));
if (DumpNextUseDistanceVerbose) {
printStringAttr(J, "use", [&](raw_ostream &OS) { OS << (*F.Use); });
printStringAttr(J, "use-mi", *F.Use->getParent(), MST);
}
J.attribute("distance", F.Dist.toJsonValue());
}
J.objectEnd();
J.attributeEnd();
}
void printDistanceFromDefToUse(json::OStream &J, const MachineFunction &MF,
const AMDGPUNextUseAnalysis &NUA,
const SIRegisterInfo &TRI,
const MachineRegisterInfo &MRI) {
auto getRegNextUseDistance = [&](Register DefReg) {
const MachineInstr &DefMI = *MRI.def_instr_begin(DefReg);
SmallVector<const MachineOperand *> Uses;
NUA.getReachableUses(DefReg, LaneBitmask::getAll(), DefMI, Uses);
if (Uses.empty())
return NextUseDistance::unreachable();
return NUA.getShortestDistance(DefReg, DefMI, Uses);
};
J.attributeBegin("distance-from-def-to-closest-use");
J.objectBegin();
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg.isPhysical())
continue;
NextUseDistance D = getRegNextUseDistance(Reg);
printAttr(J, printReg(Reg, &TRI, 0, &MRI), D.toJsonValue());
}
}
}
J.objectEnd();
J.attributeEnd();
}
void printNextUseDistancesAsJson(json::OStream &J, const MachineFunction &MF,
const AMDGPUNextUseAnalysis &NUA,
const AMDGPUNextUseAnalysisImpl &NUAImpl,
const LiveIntervals &LIS) {
using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
const Function &F = MF.getFunction();
const Module *M = F.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
// We don't actually care about register pressure here - just using
// GCNDownwardRPTracker as a convenient way of getting the set of live
// registers at a given instruction.
GCNDownwardRPTracker RPTracker(LIS);
ModuleSlotTracker MST(M);
MST.incorporateFunction(F);
DenseMap<const MachineOperand *, UseDistancePair> RelevantUses;
J.attributeBegin("furthest-distances");
J.objectBegin();
for (const MachineBasicBlock &MBB : MF) {
std::string BBName;
raw_string_ostream BBOS(BBName);
MBB.printName(BBOS, MachineBasicBlock::PrintNameIr, &MST);
J.attributeBegin(BBOS.str());
J.arrayBegin();
const MachineInstr *PrevMI = nullptr;
for (const MachineInstr &MI : MBB) {
// Update register pressure tracker
if (!PrevMI || PrevMI->getOpcode() == AMDGPU::PHI)
RPTracker.reset(MI);
RPTracker.advance();
UseDistancePair Furthest;
UseDistancePair FurthestSubreg;
RelevantUses.clear();
NUA.getNextUseDistances(RPTracker.getLiveRegs(), MI, Furthest,
&FurthestSubreg, &RelevantUses);
J.objectBegin();
printInstrMember(J, MST, MI, NUAImpl);
printDistances(J, MRI, TRI, MST, RelevantUses);
printFurthestUse(J, MRI, TRI, MST, Furthest);
printFurthestUse(J, MRI, TRI, MST, FurthestSubreg, /*Subreg*/ true);
J.objectEnd();
PrevMI = &MI;
}
J.arrayEnd();
J.attributeEnd();
}
J.objectEnd();
J.attributeEnd();
if (DumpNextUseDistanceVerbose || DumpNextUseDistanceDefToUse)
printDistanceFromDefToUse(J, MF, NUA, TRI, MRI);
if (DumpNextUseDistanceVerbose)
NUAImpl.printPaths(J, MST);
if (DistanceCacheEnabled) {
J.attributeBegin("metrics");
J.objectBegin();
{
J.attributeBegin("distance-cache");
J.objectBegin();
{
J.attribute("hits", NUAImpl.getDistanceCacheHits());
J.attribute("misses", NUAImpl.getDistanceCacheMisses());
}
J.objectEnd();
J.attributeEnd(); // distance-cache
}
J.objectEnd();
J.attributeEnd(); // metrics
}
}
void printAsJson(raw_ostream &FallbackOS, TimerGroup &JsonTimerGroup,
Timer &JsonTimer, const MachineFunction &MF,
const AMDGPUNextUseAnalysis &NUA,
const AMDGPUNextUseAnalysisImpl &NUAImpl,
const LiveIntervals &LIS) {
std::string FN = DumpNextUseDistanceAsJson;
auto dump = [&](raw_ostream &OS) {
json::OStream J(OS, 2);
J.objectBegin();
J.attributeBegin("next-use-analysis");
J.objectBegin();
printNextUseDistancesAsJson(J, MF, NUA, NUAImpl, LIS);
J.objectEnd();
J.attributeEnd();
JsonTimer.stopTimer();
JsonTimerGroup.printJSONValues(OS, ",\n");
J.objectEnd();
};
if (!DumpNextUseDistanceAsJson.getNumOccurrences()) {
dump(FallbackOS);
} else if (FN.empty() || FN == "-") {
dump(outs());
} else {
std::error_code EC;
ToolOutputFile OutF(FN, EC, sys::fs::OF_None);
dump(OutF.os());
OutF.keep();
}
}
} // namespace
//------------------------------------------------------------------------------
// Legacy Printer Pass
//------------------------------------------------------------------------------
AMDGPUNextUseAnalysisPrinterLegacyPass::AMDGPUNextUseAnalysisPrinterLegacyPass()
: MachineFunctionPass(ID) {}
StringRef AMDGPUNextUseAnalysisPrinterLegacyPass::getPassName() const {
return "AMDGPU Next Use Analysis Printer";
}
bool AMDGPUNextUseAnalysisPrinterLegacyPass::runOnMachineFunction(
MachineFunction &MF) {
TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
"AMDGPU Next Use Analysis JSON Printer", false);
Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
JsonTimer.startTimer();
const LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
const AMDGPUNextUseAnalysis &NUA =
getAnalysis<AMDGPUNextUseAnalysisLegacyPass>().getNextUseAnalysis();
printAsJson(errs(), JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
return false;
}
void AMDGPUNextUseAnalysisPrinterLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.addRequired<MachineLoopInfoWrapperPass>();
AU.addRequired<LiveIntervalsWrapperPass>();
AU.addRequired<AMDGPUNextUseAnalysisLegacyPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
char AMDGPUNextUseAnalysisPrinterLegacyPass::ID = 0;
char &AMDGPUNextUseAnalysisPrinterLegacyID =
AMDGPUNextUseAnalysisPrinterLegacyPass::ID;
INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisPrinterLegacyPass,
"amdgpu-next-use-printer",
"AMDGPU Next Use Analysis Printer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisPrinterLegacyPass,
"amdgpu-next-use-printer",
"AMDGPU Next Use Analysis Printer", false, false)
FunctionPass *llvm::createAMDGPUNextUseAnalysisPrinterLegacyPass() {
return new AMDGPUNextUseAnalysisPrinterLegacyPass();
}
//------------------------------------------------------------------------------
// New Pass Manager Printer Pass
//------------------------------------------------------------------------------
PreservedAnalyses
AMDGPUNextUseAnalysisPrinterPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
"AMDGPU Next Use Analysis JSON Printer", false);
Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
JsonTimer.startTimer();
const LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
const AMDGPUNextUseAnalysis &NUA =
MFAM.getResult<AMDGPUNextUseAnalysisPass>(MF);
printAsJson(OS, JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
return PreservedAnalyses::all();
}