Based on - https://github.com/llvm/llvm-project/pull/156079 and - https://github.com/llvm/llvm-project/pull/171520 See those PRs for background. Provides a compatibility mode option `--amdgpu-next-use-analysis-compatibility-mode` that produces results that match either PR #156079 (`compute`) or PR #171520 (`graphics`). Co-authored-by: alex-t <atimofee@amd.com> Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com> --------- Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com>
2615 lines
92 KiB
C++
2615 lines
92 KiB
C++
//===---------------------- AMDGPUNextUseAnalysis.cpp ---------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements the AMDGPUNextUseAnalysis pass, a machine-level analysis
|
|
// that computes the distance from each instruction to the "nearest" next use of
|
|
// every live virtual register. These distances guide register spilling
|
|
// decisions by identifying which live values are furthest from their next use
|
|
// and are therefore the best candidates to spill.
|
|
//
|
|
// The analysis is based on the Braun & Hack CC'09 paper "Register Spilling and
|
|
// Live-Range Splitting for SSA-Form Programs."
|
|
//
|
|
// Key concepts:
|
|
//
|
|
// NextUseDistance A loop-depth-weighted instruction count representing
|
|
// how far away a register's next use is. Distances
|
|
// through deeper loops are scaled by fromLoopDepth() so
|
|
// that uses inside hot loops appear closer.
|
|
//
|
|
// Inter-block Pre-computed shortest weighted distances between all
|
|
// distances pairs of basic blocks, used to efficiently answer
|
|
// cross-block next-use queries. Each intermediate block
|
|
// is weighted by fromLoopDepth() applied once per loop
|
|
// boundary crossing relative to the destination.
|
|
//
|
|
// Configuration flags (see Config struct in the header):
|
|
//
|
|
// CountPhis Count PHI instructions toward distance and block size.
|
|
// ForwardOnly Restrict inter-block distances to forward-reachable
|
|
// paths.
|
|
// PreciseUseModeling Model PHI uses at their incoming edge block and filter
|
|
// uses with intermediate redefinitions.
|
|
// PromoteToPreheader Route loop-entry and inner-loop uses to the preheader.
|
|
//
|
|
// This file contains:
|
|
//
|
|
// - Command-line options for configuration and debug output
|
|
// - LiveRegUse / JSON helpers
|
|
// - AMDGPUNextUseAnalysisImpl (the main analysis implementation)
|
|
// - Instruction ID assignment and block size computation
|
|
// - CFG path pre-computation (reachability, loop depth, back-edges)
|
|
// - Inter-block distance computation
|
|
// - Per-register next-use distance queries and caching
|
|
// - AMDGPUNextUseAnalysis (public facade, pimpl)
|
|
// - Legacy and new pass manager wrappers
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUNextUseAnalysis.h"
|
|
#include "AMDGPU.h"
|
|
#include "GCNRegPressure.h"
|
|
#include "GCNSubtarget.h"
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
|
#include "llvm/IR/ModuleSlotTracker.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Support/FileSystem.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/Timer.h"
|
|
#include "llvm/Support/ToolOutputFile.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <string>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-next-use-analysis"
|
|
|
|
//==============================================================================
|
|
// Options etc
|
|
//==============================================================================
|
|
namespace {
|
|
|
|
cl::opt<bool>
|
|
DistanceCacheEnabled("amdgpu-next-use-analysis-distance-cache",
|
|
cl::init(true), cl::Hidden,
|
|
cl::desc("Enable live-reg-use distance cache"));
|
|
|
|
cl::opt<std::string>
|
|
DumpNextUseDistanceAsJson("amdgpu-next-use-analysis-dump-distance-as-json",
|
|
cl::Hidden);
|
|
|
|
cl::opt<bool> DumpNextUseDistanceDefToUse(
|
|
"amdgpu-next-use-analysis-dump-distance-def-to-use", cl::init(false),
|
|
cl::Hidden);
|
|
|
|
cl::opt<bool>
|
|
DumpNextUseDistanceVerbose("amdgpu-next-use-analysis-dump-distance-verbose",
|
|
cl::init(false), cl::Hidden);
|
|
|
|
// 'graphics' and 'compute' modes arose due to initial competing implementations
|
|
// of next-use analysis that emphasized different types of workloads. This
|
|
// implementation is a compromise that combines aspects of both. Over time, the
|
|
// hope is we will be able to remove some of these differences and settle on a
|
|
// more unified implementation.
|
|
cl::opt<std::string>
|
|
ConfigPresetOpt("amdgpu-next-use-analysis-config", cl::Hidden,
|
|
cl::init("graphics"),
|
|
cl::desc("Config preset: 'graphics' or 'compute'"));
|
|
|
|
cl::opt<bool> ConfigCountPhisOpt(
|
|
"amdgpu-next-use-analysis-count-phis", cl::Hidden,
|
|
cl::desc("Count PHI instructions toward distance and block size"));
|
|
cl::opt<bool> ConfigForwardOnlyOpt(
|
|
"amdgpu-next-use-analysis-forward-only", cl::Hidden,
|
|
cl::desc("Restrict inter-block distances to forward-reachable paths"));
|
|
cl::opt<bool> ConfigPreciseUseModelingOpt(
|
|
"amdgpu-next-use-analysis-precise-use-modeling", cl::Hidden,
|
|
cl::desc("Model PHI uses via incoming edge block with loop-aware "
|
|
"reachability filtering"));
|
|
cl::opt<bool> ConfigPromoteToPreheaderOpt(
|
|
"amdgpu-next-use-analysis-use-preheader-model", cl::Hidden,
|
|
cl::desc("Promote loop-entry and inner-loop uses to the loop preheader"));
|
|
} // namespace
|
|
|
|
//==============================================================================
|
|
// LiveRegUse - Represents a live register use with its distance. Used for
|
|
// tracking and sorting register uses by distance.
|
|
//==============================================================================
|
|
namespace {
|
|
using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
|
|
struct LiveRegUse : public UseDistancePair {
|
|
// 'nullptr' indicates an unset/invalid state.
|
|
LiveRegUse() : UseDistancePair(nullptr, 0) {}
|
|
LiveRegUse(const MachineOperand *Use, NextUseDistance Dist)
|
|
: UseDistancePair(Use, Dist) {}
|
|
LiveRegUse(const UseDistancePair &P) : UseDistancePair(P) {}
|
|
|
|
bool isUnset() const { return Use == nullptr; }
|
|
|
|
Register getReg() const { return Use->getReg(); }
|
|
unsigned getSubReg() const { return Use->getSubReg(); }
|
|
LaneBitmask getLaneMask(const SIRegisterInfo *TRI) const {
|
|
return TRI->getSubRegIndexLaneMask(Use->getSubReg());
|
|
}
|
|
|
|
bool isCloserThan(const LiveRegUse &X) const {
|
|
if (Dist < X.Dist)
|
|
return true;
|
|
|
|
if (Dist > X.Dist)
|
|
return false;
|
|
|
|
if (Use == X.Use)
|
|
return false;
|
|
|
|
// Ugh. When !CountPhis, PHIs and the first non-PHI instruction have id
|
|
// 0. In this case, consider PHIs as less than the first non-PHI
|
|
// instruction.
|
|
const MachineInstr *ThisMI = Use->getParent();
|
|
const MachineInstr *XMI = X.Use->getParent();
|
|
const MachineBasicBlock *ThisMBB = ThisMI->getParent();
|
|
if (ThisMBB == XMI->getParent()) {
|
|
if (ThisMI->isPHI() && !XMI->isPHI() &&
|
|
XMI == &(*ThisMBB->getFirstNonPHI()))
|
|
return true;
|
|
}
|
|
|
|
// Ensure deterministic results
|
|
return X.getReg() < getReg();
|
|
}
|
|
|
|
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr,
|
|
const MachineRegisterInfo *MRI = nullptr) const {
|
|
if (isUnset()) {
|
|
OS << "<unset>";
|
|
return;
|
|
}
|
|
Dist.print(OS);
|
|
OS << " [" << printReg(getReg(), TRI, getSubReg(), MRI) << "]";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << '\n';
|
|
}
|
|
};
|
|
|
|
inline bool updateClosest(LiveRegUse &Closest, const LiveRegUse &X) {
|
|
if (!Closest.Use || X.isCloserThan(Closest)) {
|
|
Closest = X;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
inline bool updateFurthest(LiveRegUse &Furthest, const LiveRegUse &X) {
|
|
if (!Furthest.Use || Furthest.isCloserThan(X)) {
|
|
Furthest = X;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
} // namespace
|
|
|
|
//==============================================================================
|
|
// JSON helpers
|
|
//==============================================================================
|
|
namespace {
|
|
template <typename Lambda>
|
|
void printStringAttr(json::OStream &J, const char *Name, Lambda L) {
|
|
J.attributeBegin(Name);
|
|
raw_ostream &OS = J.rawValueBegin();
|
|
OS << '"';
|
|
L(OS);
|
|
OS << '"';
|
|
J.rawValueEnd();
|
|
J.attributeEnd();
|
|
}
|
|
void printStringAttr(json::OStream &J, const char *Name, Printable P) {
|
|
printStringAttr(J, Name, [&](raw_ostream &OS) { OS << P; });
|
|
}
|
|
|
|
void printStringAttr(json::OStream &J, const char *Name, const MachineInstr &MI,
|
|
ModuleSlotTracker &MST) {
|
|
printStringAttr(J, Name, [&](raw_ostream &OS) {
|
|
MI.print(OS, MST,
|
|
/* IsStandalone */ false,
|
|
/* SkipOpers */ false,
|
|
/* SkipDebugLoc */ false,
|
|
/* AddNewLine ---> */ false,
|
|
/* TargetInstrInfo */ nullptr);
|
|
});
|
|
}
|
|
|
|
void printMBBNameAttr(json::OStream &J, const char *Name,
|
|
const MachineBasicBlock &MBB, ModuleSlotTracker &MST) {
|
|
printStringAttr(J, Name, [&](raw_ostream &OS) {
|
|
MBB.printName(OS, MachineBasicBlock::PrintNameIr, &MST);
|
|
});
|
|
}
|
|
|
|
template <typename NameLambda, typename ValueT>
|
|
void printAttr(json::OStream &J, NameLambda NL, ValueT V) {
|
|
std::string Name;
|
|
raw_string_ostream NameOS(Name);
|
|
NL(NameOS);
|
|
J.attribute(NameOS.str(), V);
|
|
}
|
|
|
|
template <typename ValueT>
|
|
void printAttr(json::OStream &J, const Printable &P, ValueT V) {
|
|
printAttr(J, [&](raw_ostream &OS) { OS << P; }, V);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
//==============================================================================
|
|
// AMDGPUNextUseAnalysisImpl
|
|
//==============================================================================
|
|
class llvm::AMDGPUNextUseAnalysisImpl {
|
|
public:
|
|
struct CacheableNextUseDistance {
|
|
bool IsInstrRelative;
|
|
NextUseDistance Distance;
|
|
};
|
|
static constexpr bool InstrRelative = true;
|
|
static constexpr bool InstrInvariant = false;
|
|
|
|
private:
|
|
const MachineFunction *MF = nullptr;
|
|
const SIRegisterInfo *TRI = nullptr;
|
|
const SIInstrInfo *TII = nullptr;
|
|
const MachineLoopInfo *MLI = nullptr;
|
|
const MachineRegisterInfo *MRI = nullptr;
|
|
|
|
using InstrIdTy = unsigned;
|
|
using InstrToIdMap = DenseMap<const MachineInstr *, InstrIdTy>;
|
|
InstrToIdMap InstrToId;
|
|
AMDGPUNextUseAnalysis::Config Cfg;
|
|
|
|
void initializeTables() {
|
|
for (const MachineBasicBlock &BB : *MF)
|
|
calcInstrIds(&BB, InstrToId);
|
|
initializeCfgPaths();
|
|
initializeInterBlockDistances();
|
|
}
|
|
|
|
void clearTables() {
|
|
InstrToId.clear();
|
|
RegUseMap.clear();
|
|
Paths.clear();
|
|
|
|
resetDistanceCache();
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Instruction Ids
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
unsigned sizeOf(const MachineInstr &MI) const {
|
|
// When !Cfg.CountPhis, PHIs do not contribute to distances/sizes since they
|
|
// generally don't result in the generation of a machine instruction.
|
|
// FIXME: Consider using MI.isPseudo() or maybe MI.isMetaInstruction().
|
|
return Cfg.CountPhis ? 1 : !MI.isPHI();
|
|
}
|
|
|
|
void calcInstrIds(const MachineBasicBlock *BB,
|
|
InstrToIdMap &MutableInstrToId) const {
|
|
InstrIdTy Id = 0;
|
|
for (auto &MI : BB->instrs()) {
|
|
MutableInstrToId[&MI] = Id;
|
|
Id += sizeOf(MI);
|
|
}
|
|
}
|
|
|
|
/// Returns MI's instruction Id. It renumbers (part of) the BB if MI is not
|
|
/// found in the map.
|
|
InstrIdTy getInstrId(const MachineInstr *MI) const {
|
|
auto It = InstrToId.find(MI);
|
|
if (It != InstrToId.end())
|
|
return It->second;
|
|
|
|
// Renumber the MBB.
|
|
// TODO: Renumber from MI onwards.
|
|
auto &MutableInstrToId = const_cast<InstrToIdMap &>(InstrToId);
|
|
calcInstrIds(MI->getParent(), MutableInstrToId);
|
|
return InstrToId.find(MI)->second;
|
|
}
|
|
|
|
// Length of the segment from MI (inclusive) to the first instruction of the
|
|
// basic block.
|
|
InstrIdTy getHeadLen(const MachineInstr *MI) const {
|
|
const MachineBasicBlock *MBB = MI->getParent();
|
|
return getInstrId(MI) + getInstrId(&MBB->instr_front()) + 1;
|
|
}
|
|
|
|
// Length of the segment from MI (exclusive) to the last instruction of the
|
|
// basic block.
|
|
InstrIdTy getTailLen(const MachineInstr *MI) const {
|
|
const MachineBasicBlock *MBB = MI->getParent();
|
|
return getInstrId(&MBB->instr_back()) - getInstrId(MI);
|
|
}
|
|
|
|
// Length of the segment from 'From' to 'To' (exclusive). Both instructions
|
|
// must be in the same basic block.
|
|
InstrIdTy getDistance(const MachineInstr *From,
|
|
const MachineInstr *To) const {
|
|
assert(From->getParent() == To->getParent());
|
|
return getInstrId(To) - getInstrId(From);
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// RegUses - cache of uses by register
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
DenseMap<Register, SmallVector<const MachineOperand *>> RegUseMap;
|
|
|
|
const SmallVector<const MachineOperand *> &
|
|
getRegisterUses(Register Reg) const {
|
|
auto I = RegUseMap.find(Reg);
|
|
if (I != RegUseMap.end())
|
|
return I->second;
|
|
|
|
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
|
|
SmallVector<const MachineOperand *> &Uses = NonConstThis->RegUseMap[Reg];
|
|
for (const MachineOperand &UseMO : MRI->use_nodbg_operands(Reg)) {
|
|
if (!UseMO.isUndef())
|
|
Uses.push_back(&UseMO);
|
|
}
|
|
return Uses;
|
|
}
|
|
|
|
bool hasAtLeastOneUse(Register Reg) const {
|
|
return !getRegisterUses(Reg).empty();
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Paths
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
class Path {
|
|
using StorageTy =
|
|
std::pair<const MachineBasicBlock *, const MachineBasicBlock *>;
|
|
StorageTy P;
|
|
|
|
public:
|
|
constexpr Path() : P(nullptr, nullptr) {}
|
|
constexpr Path(const MachineBasicBlock *Src, const MachineBasicBlock *Dst)
|
|
: P(Src, Dst) {}
|
|
Path(const StorageTy &Pair) : P(Pair) {}
|
|
|
|
constexpr operator const StorageTy &() const { return P; }
|
|
using DenseMapInfo = llvm::DenseMapInfo<StorageTy>;
|
|
|
|
const MachineBasicBlock *src() const { return P.first; }
|
|
const MachineBasicBlock *dst() const { return P.second; }
|
|
};
|
|
|
|
enum class EdgeKind { Back = -1, None = 0, Forward = 1 };
|
|
static constexpr StringRef toString(EdgeKind EK) {
|
|
if (EK == EdgeKind::Back)
|
|
return "back";
|
|
if (EK == EdgeKind::Forward)
|
|
return "fwd";
|
|
return "none";
|
|
}
|
|
|
|
struct PathInfo {
|
|
EdgeKind EK;
|
|
bool Reachable;
|
|
int ForwardReachable;
|
|
unsigned RelativeLoopDepth;
|
|
std::optional<NextUseDistance> ShortestDistance;
|
|
std::optional<NextUseDistance> ShortestUnweightedDistance;
|
|
InstrIdTy Size;
|
|
|
|
PathInfo()
|
|
: EK(EdgeKind::None), Reachable(false), ForwardReachable(-1),
|
|
RelativeLoopDepth(0), Size(0) {}
|
|
|
|
bool isBackedge() const { return EK == EdgeKind::Back; }
|
|
|
|
bool isForwardReachableSet() const { return 0 <= ForwardReachable; }
|
|
bool isForwardReachableUnset() const { return ForwardReachable < 0; }
|
|
bool isForwardReachable() const { return ForwardReachable == 1; }
|
|
bool isNotForwardReachable() const { return ForwardReachable == 0; }
|
|
|
|
void print(raw_ostream &OS) const {
|
|
OS << "{ek=" << toString(EK) << " reach=" << Reachable
|
|
<< " fwd-reach=" << ForwardReachable
|
|
<< " loop-depth=" << RelativeLoopDepth << " size=" << Size;
|
|
if (ShortestDistance) {
|
|
OS << " shortest-dist=";
|
|
ShortestDistance->print(OS);
|
|
}
|
|
if (ShortestUnweightedDistance) {
|
|
OS << " shortest-unweighted-dist=";
|
|
ShortestUnweightedDistance->print(OS);
|
|
}
|
|
OS << "}";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << '\n';
|
|
}
|
|
};
|
|
|
|
//----------------------------------------------------------------------------
|
|
// Path Storage - 'Paths' is lazily populated and some members are lazily
|
|
// computed. All mutations should go through one of the 'initializePathInfo*'
|
|
// flavors below.
|
|
//----------------------------------------------------------------------------
|
|
DenseMap<Path, PathInfo, Path::DenseMapInfo> Paths;
|
|
|
|
const PathInfo *maybePathInfoFor(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
auto I = Paths.find({From, To});
|
|
return I == Paths.end() ? nullptr : &I->second;
|
|
}
|
|
|
|
PathInfo &getOrInitPathInfo(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
|
|
auto &MutablePaths = NonConstThis->Paths;
|
|
|
|
Path P(From, To);
|
|
auto [I, Inserted] = MutablePaths.try_emplace(P);
|
|
if (!Inserted)
|
|
return I->second;
|
|
|
|
bool Reachable = calcIsReachable(P.src(), P.dst());
|
|
|
|
// Iterator may have been invalidated by calcIsReachable, so get a fresh
|
|
// reference to the slot.
|
|
return NonConstThis->initializePathInfo(MutablePaths.at(P), P,
|
|
EdgeKind::None, Reachable);
|
|
}
|
|
|
|
const PathInfo &pathInfoFor(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return getOrInitPathInfo(From, To);
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
// initializePathInfo* - various flavors of PathInfo initialization. They
|
|
// (should) always funnel to the first flavor below.
|
|
//----------------------------------------------------------------------------
|
|
PathInfo &initializePathInfo(PathInfo &Slot, Path P, EdgeKind EK,
|
|
bool Reachable) {
|
|
Slot.EK = EK;
|
|
Slot.Reachable = Reachable;
|
|
Slot.ForwardReachable = EK == EdgeKind::None ? -1 : EK == EdgeKind::Forward;
|
|
Slot.RelativeLoopDepth =
|
|
Slot.Reachable ? calcRelativeLoopDepth(P.src(), P.dst()) : 0;
|
|
Slot.Size = P.src() == P.dst() ? calcSize(P.src()) : 0;
|
|
if (EK != EdgeKind::None)
|
|
Slot.ShortestUnweightedDistance = 0;
|
|
return Slot;
|
|
}
|
|
|
|
PathInfo &initializePathInfo(Path P, EdgeKind EK, bool Reachable) const {
|
|
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
|
|
auto &MutablePaths = NonConstThis->Paths;
|
|
return NonConstThis->initializePathInfo(MutablePaths[P], P, EK, Reachable);
|
|
}
|
|
|
|
std::pair<PathInfo *, bool> maybeInitializePathInfo(Path P, EdgeKind EK,
|
|
bool Reachable) const {
|
|
auto *NonConstThis = const_cast<AMDGPUNextUseAnalysisImpl *>(this);
|
|
auto &MutablePaths = NonConstThis->Paths;
|
|
auto [I, Inserted] = MutablePaths.try_emplace(P);
|
|
if (Inserted)
|
|
NonConstThis->initializePathInfo(I->second, P, EK, Reachable);
|
|
return {&I->second, Inserted};
|
|
}
|
|
|
|
bool initializePathInfoForwardReachable(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
bool Value) const {
|
|
PathInfo &Slot = getOrInitPathInfo(From, To);
|
|
assert(Slot.isForwardReachableUnset());
|
|
Slot.ForwardReachable = Value;
|
|
return Value;
|
|
}
|
|
|
|
NextUseDistance
|
|
initializePathInfoShortestDistance(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
NextUseDistance Value) const {
|
|
PathInfo &Slot = getOrInitPathInfo(From, To);
|
|
assert(!Slot.ShortestDistance.has_value());
|
|
Slot.ShortestDistance = Value;
|
|
return Value;
|
|
}
|
|
|
|
NextUseDistance
|
|
initializePathInfoShortestUnweightedDistance(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
NextUseDistance Value) const {
|
|
PathInfo &Slot = getOrInitPathInfo(From, To);
|
|
assert(!Slot.ShortestUnweightedDistance.has_value());
|
|
Slot.ShortestUnweightedDistance = Value;
|
|
return Value;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
// initialize*Paths
|
|
//----------------------------------------------------------------------------
|
|
private:
|
|
void initializePaths(const SmallVector<Path> &ReachablePaths,
|
|
const SmallVector<Path> &UnreachablePaths) const {
|
|
for (const Path &P : ReachablePaths)
|
|
initializePathInfo(P, EdgeKind::None, true);
|
|
for (const Path &P : UnreachablePaths)
|
|
initializePathInfo(P, EdgeKind::None, false);
|
|
}
|
|
|
|
void
|
|
initializeForwardOnlyPaths(const SmallVector<Path> &ReachablePaths,
|
|
const SmallVector<Path> &UnreachablePaths) const {
|
|
for (bool R : {true, false}) {
|
|
const auto &ToInit = R ? ReachablePaths : UnreachablePaths;
|
|
for (const Path &P : ToInit) {
|
|
PathInfo &Slot = getOrInitPathInfo(P.src(), P.dst());
|
|
assert(Slot.isForwardReachableUnset() || Slot.ForwardReachable == R);
|
|
Slot.ForwardReachable = R;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Follow the control flow graph starting at the entry block until all blocks
|
|
// have been visited. Along the way, initialize the PathInfo for each edge
|
|
// traversed.
|
|
void initializeCfgPaths() {
|
|
Paths.clear();
|
|
|
|
enum VisitState { Undiscovered, Visiting, Finished };
|
|
DenseMap<const MachineBasicBlock *, VisitState> State;
|
|
|
|
SmallVector<const MachineBasicBlock *> Work{&MF->front()};
|
|
State[&MF->front()] = Undiscovered;
|
|
|
|
while (!Work.empty()) {
|
|
const MachineBasicBlock *Src = Work.back();
|
|
VisitState &SrcState = State[Src];
|
|
|
|
// A block may already be 'Finished' if it is reachable from multiple
|
|
// predecessors causing it to be pushed more than once while still
|
|
// 'Undiscovered'.
|
|
if (SrcState == Visiting || SrcState == Finished) {
|
|
Work.pop_back();
|
|
SrcState = Finished;
|
|
continue;
|
|
}
|
|
|
|
SrcState = Visiting;
|
|
for (const MachineBasicBlock *Dst : Src->successors()) {
|
|
const VisitState DstState = State.lookup(Dst);
|
|
|
|
EdgeKind EK;
|
|
if (DstState == Undiscovered) {
|
|
EK = EdgeKind::Forward;
|
|
Work.push_back(Dst);
|
|
} else if (DstState == Visiting) {
|
|
EK = EdgeKind::Back;
|
|
} else {
|
|
EK = EdgeKind::Forward;
|
|
}
|
|
|
|
Path P(Src, Dst);
|
|
assert(!Paths.contains(P));
|
|
initializePathInfo(P, EK, /*Reachable*/ true);
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG(dumpPaths());
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Loop helpers
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
static bool isStandAloneLoop(const MachineLoop *Loop) {
|
|
return Loop->getSubLoops().empty() && Loop->isOutermost();
|
|
}
|
|
|
|
static MachineLoop *findChildLoop(MachineLoop *const Parent,
|
|
MachineLoop *Descendant) {
|
|
for (MachineLoop *L = Descendant; L != Parent; L = L->getParentLoop()) {
|
|
if (L->getParentLoop() == Parent)
|
|
return L;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
// If loops 'A' and 'B' share a common parent loop, return that loop and the
|
|
// depth of 'A' relative to it. Otherwise return nullptr and the loop depth of
|
|
// 'A'.
|
|
static std::pair<MachineLoop *, unsigned>
|
|
findCommonParent(MachineLoop *A, const MachineLoop *B) {
|
|
unsigned Depth = 0;
|
|
for (; A != nullptr; A = A->getParentLoop(), ++Depth) {
|
|
if (A->contains(B))
|
|
break;
|
|
}
|
|
return {A, Depth};
|
|
}
|
|
|
|
static const MachineBasicBlock *
|
|
getOutermostPreheader(const MachineLoop *Loop) {
|
|
return Loop ? Loop->getOutermostLoop()->getLoopPreheader() : nullptr;
|
|
}
|
|
|
|
static MachineBasicBlock *findChildPreheader(MachineLoop *const Parent,
|
|
MachineLoop *Descendant) {
|
|
MachineLoop *ChildLoop = findChildLoop(Parent, Descendant);
|
|
return ChildLoop ? ChildLoop->getLoopPreheader() : nullptr;
|
|
}
|
|
|
|
static const MachineBasicBlock *
|
|
getIncomingBlockIfPhiUse(const MachineInstr *MI, const MachineOperand *MO) {
|
|
return MI->isPHI() ? MI->getOperand(MO->getOperandNo() + 1).getMBB()
|
|
: nullptr;
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Calculate features
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
InstrIdTy calcSize(const MachineBasicBlock *BB) const {
|
|
InstrIdTy Size = BB->size();
|
|
if (!Cfg.CountPhis)
|
|
Size -= std::distance(BB->begin(), BB->getFirstNonPHI());
|
|
return Size;
|
|
}
|
|
|
|
NextUseDistance calcWeightedSize(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return NextUseDistance::fromSize(getSize(From),
|
|
getRelativeLoopDepth(From, To));
|
|
}
|
|
|
|
// Return the loop depth of 'From' relative to 'To'.
|
|
unsigned calcRelativeLoopDepth(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
MachineLoop *LoopFrom = MLI->getLoopFor(From);
|
|
MachineLoop *LoopTo = MLI->getLoopFor(To);
|
|
|
|
if (!LoopFrom)
|
|
return 0;
|
|
|
|
if (!LoopTo)
|
|
return LoopFrom->getLoopDepth();
|
|
|
|
if (LoopFrom->contains(LoopTo)) // covers LoopFrom == LoopTo
|
|
return 0;
|
|
|
|
if (LoopTo->contains(LoopFrom))
|
|
return LoopFrom->getLoopDepth() - LoopTo->getLoopDepth();
|
|
|
|
// Loops are siblings of some sort.
|
|
return findCommonParent(LoopFrom, LoopTo).second;
|
|
}
|
|
|
|
// Attempt to find a path from 'From' to 'To' using a depth first search. If
|
|
// 'ForwardOnly' is true, do not follow backedges. As a performance
|
|
// improvement, this may initialize reachable intermediate paths or paths we
|
|
// determine are unreachable.
|
|
bool calcIsReachable(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
bool ForwardOnly = false) const {
|
|
if (From == To && !MLI->getLoopFor(From))
|
|
return false;
|
|
|
|
if (!ForwardOnly && interBlockDistanceExists(From, To))
|
|
return true;
|
|
|
|
enum { VisitOp, PopOp };
|
|
using MBBOpPair = std::pair<const MachineBasicBlock *, int>;
|
|
SmallVector<MBBOpPair> Work{{From, VisitOp}};
|
|
DenseSet<const MachineBasicBlock *> Visited{From};
|
|
|
|
SmallVector<Path> IntermediatePath;
|
|
SmallVector<Path> Unreachable;
|
|
|
|
// Should be run at every function exit point.
|
|
auto Finally = [&](bool Reachable) {
|
|
// This is an optimization. For intermediate paths we found while
|
|
// calculating reachability for 'From' --> 'To', remember their
|
|
// reachability.
|
|
if (!Reachable) {
|
|
IntermediatePath.clear();
|
|
for (const MachineBasicBlock *MBB : Visited) {
|
|
if (MBB != From)
|
|
Unreachable.emplace_back(MBB, To);
|
|
}
|
|
}
|
|
|
|
if (ForwardOnly)
|
|
initializeForwardOnlyPaths(IntermediatePath, Unreachable);
|
|
else
|
|
initializePaths(IntermediatePath, Unreachable);
|
|
|
|
return Reachable;
|
|
};
|
|
|
|
while (!Work.empty()) {
|
|
auto [Current, Op] = Work.pop_back_val();
|
|
|
|
// Backtracking
|
|
if (Op == PopOp) {
|
|
IntermediatePath.pop_back();
|
|
if (ForwardOnly)
|
|
Unreachable.emplace_back(Current, To);
|
|
continue;
|
|
}
|
|
|
|
if (Current->succ_empty())
|
|
continue;
|
|
|
|
if (Current != From) {
|
|
IntermediatePath.emplace_back(Current, To);
|
|
Work.emplace_back(Current, PopOp);
|
|
}
|
|
|
|
for (const MachineBasicBlock *Succ : Current->successors()) {
|
|
if (ForwardOnly && isBackedge(Current, Succ))
|
|
continue;
|
|
|
|
if (Succ == To)
|
|
return Finally(true);
|
|
|
|
if (auto CachedReachable = isMaybeReachable(Succ, To, ForwardOnly)) {
|
|
if (CachedReachable.value())
|
|
return Finally(true);
|
|
Visited.insert(Succ);
|
|
continue;
|
|
}
|
|
|
|
if (Visited.insert(Succ).second)
|
|
Work.emplace_back(Succ, VisitOp);
|
|
}
|
|
}
|
|
|
|
return Finally(false);
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
// Inter-block distance - the weighted and unweighted cost (i.e. "distance")
|
|
// to travel from one MachineBasicBlock to another.
|
|
//
|
|
// Values are pre-computed and stored in 'InterBlockDistances' using a
|
|
// backwards data-flow algorithm similar to the one described in 4.1 of a
|
|
// "Register Spilling and Live-Range Splitting for SSA-Form Programs" by
|
|
// Matthias Braun and Sebastian Hack, CC'09. This replaced a prior
|
|
// implementation based on Dijkstra's shortest path algorithm.
|
|
//----------------------------------------------------------------------------
|
|
private:
|
|
struct InterBlockDistance {
|
|
NextUseDistance Weighted;
|
|
NextUseDistance Unweighted;
|
|
InterBlockDistance() : Weighted(-1), Unweighted(-1) {}
|
|
InterBlockDistance(NextUseDistance W, NextUseDistance UW)
|
|
: Weighted(W), Unweighted(UW) {}
|
|
bool operator==(const InterBlockDistance &Other) const {
|
|
return Weighted == Other.Weighted && Unweighted == Other.Unweighted;
|
|
}
|
|
bool operator!=(const InterBlockDistance &Other) const {
|
|
return !(*this == Other);
|
|
}
|
|
|
|
void print(raw_ostream &OS) const {
|
|
OS << "{W=";
|
|
Weighted.print(OS);
|
|
OS << " U=";
|
|
Unweighted.print(OS);
|
|
OS << "}";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << '\n';
|
|
}
|
|
};
|
|
using InterBlockDistanceMap =
|
|
DenseMap<unsigned, DenseMap<unsigned, InterBlockDistance>>;
|
|
InterBlockDistanceMap InterBlockDistances;
|
|
|
|
void initializeInterBlockDistances() {
|
|
InterBlockDistanceMap Distances;
|
|
|
|
bool Changed;
|
|
do {
|
|
Changed = false;
|
|
for (const MachineBasicBlock *MBB : post_order(MF)) {
|
|
unsigned MBBNum = MBB->getNumber();
|
|
|
|
// Save previous state for convergence check
|
|
InterBlockDistanceMap::mapped_type Prev = std::move(Distances[MBBNum]);
|
|
InterBlockDistanceMap::mapped_type Curr;
|
|
Curr.reserve(Prev.size());
|
|
|
|
// Direct successors are distance 0 by definition: no instructions are
|
|
// executed between exiting MBB and entering Succ.
|
|
for (const MachineBasicBlock *Succ : MBB->successors())
|
|
Curr[Succ->getNumber()] = InterBlockDistance(0, 0);
|
|
|
|
// Propagate further destinations through each successor.
|
|
for (const MachineBasicBlock *Succ : MBB->successors()) {
|
|
unsigned SuccNum = Succ->getNumber();
|
|
const unsigned UnweightedSize{getSize(Succ)};
|
|
|
|
for (const auto &[DestBlockNum, DestDist] : Distances[SuccNum]) {
|
|
// MBB -> MBB is considered unreachable (getInterBlockDistance
|
|
// asserts From != To).
|
|
if (DestBlockNum == MBBNum)
|
|
continue;
|
|
|
|
const MachineBasicBlock *DestMBB =
|
|
MF->getBlockNumbered(DestBlockNum);
|
|
|
|
const NextUseDistance UnweightedDist{UnweightedSize +
|
|
DestDist.Unweighted};
|
|
|
|
unsigned SuccToDestLoopDepth = calcRelativeLoopDepth(Succ, DestMBB);
|
|
|
|
const NextUseDistance WeightedDist =
|
|
DestDist.Weighted +
|
|
NextUseDistance::fromSize(UnweightedSize, SuccToDestLoopDepth);
|
|
|
|
// Insert or update distances (take minimum)
|
|
auto [I, First] =
|
|
Curr.try_emplace(DestBlockNum, WeightedDist, UnweightedDist);
|
|
if (!First) {
|
|
InterBlockDistance &Slot = I->second;
|
|
Slot.Weighted = min(Slot.Weighted, WeightedDist);
|
|
Slot.Unweighted = min(Slot.Unweighted, UnweightedDist);
|
|
}
|
|
}
|
|
}
|
|
Changed |= (Prev != Curr);
|
|
Distances[MBBNum] = std::move(Curr);
|
|
}
|
|
} while (Changed);
|
|
|
|
InterBlockDistances = std::move(Distances);
|
|
LLVM_DEBUG(dumpInterBlockDistances());
|
|
}
|
|
|
|
const InterBlockDistance *
|
|
getInterBlockDistanceMapValue(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
auto I = InterBlockDistances.find(From->getNumber());
|
|
if (I == InterBlockDistances.end())
|
|
return nullptr;
|
|
const InterBlockDistanceMap::mapped_type &FromSlot = I->second;
|
|
auto J = FromSlot.find(To->getNumber());
|
|
return J == FromSlot.end() ? nullptr : &J->second;
|
|
}
|
|
|
|
bool interBlockDistanceExists(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return getInterBlockDistanceMapValue(From, To);
|
|
}
|
|
|
|
NextUseDistance getInterBlockDistance(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
bool Unweighted) const {
|
|
|
|
assert(From != To && "The basic blocks should be different.");
|
|
if (!From || !To)
|
|
return NextUseDistance::unreachable();
|
|
|
|
if (Cfg.ForwardOnly && !isForwardReachable(From, To))
|
|
return NextUseDistance::unreachable();
|
|
|
|
const InterBlockDistance *BD = getInterBlockDistanceMapValue(From, To);
|
|
if (!BD)
|
|
return NextUseDistance::unreachable();
|
|
|
|
return Unweighted ? BD->Unweighted : BD->Weighted;
|
|
}
|
|
|
|
NextUseDistance
|
|
getWeightedInterBlockDistance(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return getInterBlockDistance(From, To, false);
|
|
}
|
|
|
|
NextUseDistance
|
|
getUnweightedInterBlockDistance(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return getInterBlockDistance(From, To, true);
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Feature getters. Use cached results if available. If not calculate.
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
InstrIdTy getSize(const MachineBasicBlock *BB) const {
|
|
return pathInfoFor(BB, BB).Size;
|
|
}
|
|
|
|
bool isReachable(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return pathInfoFor(From, To).Reachable;
|
|
}
|
|
|
|
bool isReachableOrSame(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return From == To || pathInfoFor(From, To).Reachable;
|
|
}
|
|
|
|
bool isForwardReachable(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
const PathInfo &PI = pathInfoFor(From, To);
|
|
if (PI.isForwardReachableSet())
|
|
return PI.isForwardReachable();
|
|
|
|
return initializePathInfoForwardReachable(
|
|
From, To,
|
|
PI.Reachable && calcIsReachable(From, To, /*ForwardOnly*/ true));
|
|
}
|
|
|
|
// Return true/false if we know that 'To' is reachable or not from
|
|
// 'From'. Otherwise return 'std::nullopt'.
|
|
std::optional<bool> isMaybeReachable(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To,
|
|
bool ForwardOnly) const {
|
|
const PathInfo *PI = maybePathInfoFor(From, To);
|
|
if (!PI)
|
|
return std::nullopt;
|
|
|
|
if (ForwardOnly) {
|
|
if (PI->isForwardReachable())
|
|
return true;
|
|
|
|
if (PI->isNotForwardReachable())
|
|
return false;
|
|
return std::nullopt;
|
|
}
|
|
return PI->Reachable;
|
|
}
|
|
|
|
bool isBackedge(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return pathInfoFor(From, To).isBackedge();
|
|
}
|
|
|
|
// Can be used as a substitute for DT->dominates(A, B) if A and B are in the
|
|
// same basic block.
|
|
bool instrsAreInOrder(const MachineInstr *A, const MachineInstr *B) const {
|
|
assert(A->getParent() == B->getParent() &&
|
|
"instructions must be in the same basic block!");
|
|
if (A == B || getInstrId(A) < getInstrId(B))
|
|
return true;
|
|
if (!A->isPHI())
|
|
return false;
|
|
if (!B->isPHI())
|
|
return true;
|
|
for (auto &PHI : A->getParent()->phis()) {
|
|
if (&PHI == A)
|
|
return true;
|
|
if (&PHI == B)
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
unsigned getRelativeLoopDepth(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
return pathInfoFor(From, To).RelativeLoopDepth;
|
|
}
|
|
|
|
NextUseDistance getShortestPath(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
std::optional<NextUseDistance> MaybeD =
|
|
pathInfoFor(From, To).ShortestDistance;
|
|
if (MaybeD.has_value())
|
|
return MaybeD.value();
|
|
|
|
NextUseDistance Dist = getWeightedInterBlockDistance(From, To);
|
|
return initializePathInfoShortestDistance(From, To, Dist);
|
|
}
|
|
|
|
NextUseDistance getShortestUnweightedPath(const MachineBasicBlock *From,
|
|
const MachineBasicBlock *To) const {
|
|
std::optional<NextUseDistance> MaybeD =
|
|
pathInfoFor(From, To).ShortestUnweightedDistance;
|
|
if (MaybeD.has_value())
|
|
return MaybeD.value();
|
|
|
|
return initializePathInfoShortestUnweightedDistance(
|
|
From, To, getUnweightedInterBlockDistance(From, To));
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
/// MBBDistPair - Represents the distance to a machine basic block.
|
|
/// Used for returning both the distance and the target block together.
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
struct MBBDistPair {
|
|
NextUseDistance Distance;
|
|
const MachineBasicBlock *MBB;
|
|
MBBDistPair() : Distance(NextUseDistance::unreachable()), MBB(nullptr) {}
|
|
MBBDistPair(NextUseDistance D, const MachineBasicBlock *B)
|
|
: Distance(D), MBB(B) {}
|
|
|
|
MBBDistPair operator+(NextUseDistance D) { return {Distance + D, MBB}; }
|
|
MBBDistPair &operator+=(NextUseDistance D) {
|
|
Distance += D;
|
|
return *this;
|
|
}
|
|
|
|
void print(raw_ostream &OS) const {
|
|
OS << "{";
|
|
Distance.print(OS);
|
|
if (MBB)
|
|
OS << " " << printMBBReference(*MBB);
|
|
else
|
|
OS << " <null>";
|
|
OS << "}";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << '\n';
|
|
}
|
|
};
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// CFG Helpers
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
// Return the shortest distance to a latch
|
|
MBBDistPair calcShortestDistanceToLatch(const MachineBasicBlock *CurMBB,
|
|
const MachineLoop *CurLoop) const {
|
|
SmallVector<MachineBasicBlock *, 2> Latches;
|
|
CurLoop->getLoopLatches(Latches);
|
|
MBBDistPair LD;
|
|
|
|
for (MachineBasicBlock *LMBB : Latches) {
|
|
if (LMBB == CurMBB)
|
|
return {0, CurMBB};
|
|
|
|
NextUseDistance Dst = getShortestPath(CurMBB, LMBB);
|
|
if (Dst < LD.Distance) {
|
|
LD.Distance = Dst;
|
|
LD.MBB = LMBB;
|
|
}
|
|
}
|
|
return LD;
|
|
}
|
|
|
|
// Return the shortest unweighted distance to a latch
|
|
MBBDistPair
|
|
calcShortestUnweightedDistanceToLatch(const MachineBasicBlock *CurMBB,
|
|
const MachineLoop *CurLoop) const {
|
|
SmallVector<MachineBasicBlock *, 2> Latches;
|
|
CurLoop->getLoopLatches(Latches);
|
|
MBBDistPair LD;
|
|
|
|
for (MachineBasicBlock *LMBB : Latches) {
|
|
if (LMBB == CurMBB)
|
|
return {0, CurMBB};
|
|
|
|
NextUseDistance Dst = getShortestUnweightedPath(CurMBB, LMBB);
|
|
if (Dst < LD.Distance) {
|
|
LD.Distance = Dst;
|
|
LD.MBB = LMBB;
|
|
}
|
|
}
|
|
return LD;
|
|
}
|
|
|
|
// Return the shortest distance to an exit
|
|
MBBDistPair calcShortestDistanceToExit(const MachineBasicBlock *CurMBB,
|
|
const MachineLoop *CurLoop) const {
|
|
SmallVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ExitEdges;
|
|
CurLoop->getExitEdges(ExitEdges);
|
|
MBBDistPair LD;
|
|
|
|
for (auto [Exit, Dest] : ExitEdges) {
|
|
if (Exit == CurMBB)
|
|
return {0, CurMBB};
|
|
|
|
NextUseDistance Dst = getShortestPath(CurMBB, Exit);
|
|
if (Dst < LD.Distance) {
|
|
LD.Distance = Dst;
|
|
LD.MBB = Exit;
|
|
}
|
|
}
|
|
return LD;
|
|
}
|
|
|
|
// Return the shortest distance through a loop (header to latch) that goes
|
|
// through CurMBB.
|
|
MBBDistPair
|
|
calcShortestDistanceThroughInnermostLoop(const MachineBasicBlock *CurMBB,
|
|
MachineLoop *CurLoop) const {
|
|
assert(MLI->getLoopFor(CurMBB) == CurLoop);
|
|
|
|
// This is a hot spot. Check it before doing anything else.
|
|
if (CurLoop->getNumBlocks() == 1)
|
|
return {getSize(CurMBB), CurMBB};
|
|
|
|
MachineBasicBlock *LoopHeader = CurLoop->getHeader();
|
|
MBBDistPair LD{0, nullptr};
|
|
|
|
LD += getSize(LoopHeader);
|
|
|
|
if (CurMBB != LoopHeader)
|
|
LD += getShortestPath(LoopHeader, CurMBB);
|
|
|
|
if (CurLoop->isLoopExiting(CurMBB))
|
|
LD.MBB = CurMBB;
|
|
else
|
|
LD = calcShortestDistanceToExit(CurMBB, CurLoop) + LD.Distance;
|
|
|
|
if (CurMBB != LoopHeader && CurMBB != LD.MBB)
|
|
LD += getSize(CurMBB);
|
|
|
|
if (LD.MBB != LoopHeader)
|
|
LD += getSize(LD.MBB);
|
|
|
|
return LD;
|
|
}
|
|
|
|
// Return the shortest distance through a loop (header to latch) that goes
|
|
// through CurMBB.
|
|
MBBDistPair calcShortestDistanceThroughLoop(const MachineBasicBlock *CurMBB,
|
|
MachineLoop *OuterLoop) const {
|
|
MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
|
|
MBBDistPair CurLD =
|
|
calcShortestDistanceThroughInnermostLoop(CurMBB, CurLoop);
|
|
|
|
MachineBasicBlock *CurHdr = CurLoop->getHeader();
|
|
for (;;) {
|
|
if (OuterLoop == CurLoop)
|
|
return CurLD;
|
|
|
|
MachineLoop *ParentLoop = CurLoop->getParentLoop();
|
|
MachineBasicBlock *ParentHdr = ParentLoop->getHeader();
|
|
|
|
MBBDistPair LD{0, nullptr};
|
|
LD += getSize(ParentHdr);
|
|
LD += getShortestPath(ParentHdr, CurHdr);
|
|
LD += CurLD.Distance.applyLoopWeight();
|
|
LD = calcShortestDistanceToExit(CurLD.MBB, ParentLoop) + LD.Distance;
|
|
LD += getSize(LD.MBB);
|
|
CurLD = LD;
|
|
CurLoop = ParentLoop;
|
|
CurHdr = ParentHdr;
|
|
}
|
|
llvm_unreachable("CurMBB not contained in OuterLoop");
|
|
}
|
|
|
|
// Similar to calcShortestDistanceThroughLoop with LoopWeight applied to the
|
|
// returned distance.
|
|
MBBDistPair
|
|
calcWeightedDistanceThroughLoopViaMBB(const MachineBasicBlock *CurMBB,
|
|
MachineLoop *CurLoop) const {
|
|
MBBDistPair LD = calcShortestDistanceThroughLoop(CurMBB, CurLoop);
|
|
LD.Distance = LD.Distance.applyLoopWeight();
|
|
return LD;
|
|
}
|
|
|
|
// Return the weighted, shortest distance through a loop (header to latch).
|
|
// If ParentLoop is provided, use it to adjust the loop depth.
|
|
MBBDistPair calcWeightedDistanceThroughLoop(
|
|
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
|
|
const MachineLoop *ParentLoop = nullptr) const {
|
|
if (CurLoop->getNumBlocks() != 1)
|
|
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
|
|
|
|
unsigned LoopDepth = MLI->getLoopDepth(CurMBB);
|
|
if (ParentLoop)
|
|
LoopDepth -= ParentLoop->getLoopDepth();
|
|
|
|
return {NextUseDistance::fromSize(getSize(CurMBB), LoopDepth),
|
|
CurLoop->getLoopLatch()};
|
|
}
|
|
|
|
// Calculate total distance from exit point to use instruction
|
|
NextUseDistance appendDistanceToUse(const MBBDistPair &Exit,
|
|
const MachineInstr *UseMI,
|
|
const MachineBasicBlock *UseMBB) const {
|
|
return Exit.Distance + getShortestPath(Exit.MBB, UseMBB) +
|
|
getHeadLen(UseMI);
|
|
}
|
|
|
|
// Return the weighted, shortest distance through the CurLoop which is a
|
|
// sub-loop of UseLoop.
|
|
MBBDistPair calcDistanceThroughSubLoopUse(const MachineBasicBlock *CurMBB,
|
|
MachineLoop *CurLoop,
|
|
MachineLoop *UseLoop) const {
|
|
// All the sub-loops of the UseLoop will be executed before the use.
|
|
// Hence, we should take this into consideration in distance calculation.
|
|
MachineLoop *UseLoopSubLoop = findChildLoop(UseLoop, CurLoop);
|
|
assert(UseLoopSubLoop && "CurLoop should be nested in UseLoop");
|
|
return calcWeightedDistanceThroughLoop(CurMBB, UseLoopSubLoop, UseLoop);
|
|
}
|
|
|
|
// Similar to calcDistanceThroughSubLoopUse, adding the distance to 'UseMI'.
|
|
NextUseDistance calcDistanceThroughSubLoopToUseMI(
|
|
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
|
|
const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
|
|
MachineLoop *UseLoop) const {
|
|
return appendDistanceToUse(
|
|
calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop), UseMI, UseMBB);
|
|
}
|
|
|
|
// Return the weighted distance through a loop to an outside use loop.
|
|
// Differentiates between uses inside or outside of the current loop nest.
|
|
MBBDistPair calcDistanceThroughLoopToOutsideLoopUse(
|
|
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
|
|
const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
|
|
assert(!CurLoop->contains(UseLoop));
|
|
|
|
if (isStandAloneLoop(CurLoop))
|
|
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, CurLoop);
|
|
|
|
MachineLoop *OutermostLoop = CurLoop->getOutermostLoop();
|
|
if (!OutermostLoop->contains(UseLoop)) {
|
|
// We should take into consideration the whole loop nest in the
|
|
// calculation of the distance because we will reach the use after
|
|
// executing the whole loop nest.
|
|
|
|
// ... But make sure that we pick a route that goes through CurMBB
|
|
return calcWeightedDistanceThroughLoopViaMBB(CurMBB, OutermostLoop);
|
|
}
|
|
|
|
// At this point we know that CurLoop and UseLoop are independent and they
|
|
// are in the same loop nest.
|
|
|
|
if (MLI->getLoopDepth(CurMBB) <= MLI->getLoopDepth(UseMBB))
|
|
return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
|
|
|
|
assert(CurLoop != OutermostLoop && "The loop cannot be the outermost.");
|
|
const unsigned UseLoopDepth = MLI->getLoopDepth(UseMBB);
|
|
for (;;) {
|
|
if (CurLoop->getLoopDepth() == UseLoopDepth)
|
|
break;
|
|
CurLoop = CurLoop->getParentLoop();
|
|
if (CurLoop == OutermostLoop)
|
|
break;
|
|
}
|
|
return calcWeightedDistanceThroughLoop(CurMBB, CurLoop);
|
|
}
|
|
|
|
// Similar to calcDistanceThroughLoopToOutsideLoopUse but adds the distance to
|
|
// an instruction in the loop.
|
|
NextUseDistance calcDistanceThroughLoopToOutsideLoopUseMI(
|
|
const MachineBasicBlock *CurMBB, MachineLoop *CurLoop,
|
|
const MachineInstr *UseMI, const MachineBasicBlock *UseMBB,
|
|
MachineLoop *UseLoop) const {
|
|
return appendDistanceToUse(calcDistanceThroughLoopToOutsideLoopUse(
|
|
CurMBB, CurLoop, UseMBB, UseLoop),
|
|
UseMI, UseMBB);
|
|
}
|
|
|
|
// Return true if 'MO' is covered by 'LaneMask'
|
|
bool machineOperandCoveredBy(const MachineOperand &MO,
|
|
LaneBitmask LaneMask) const {
|
|
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
|
|
return (Mask & LaneMask) == Mask;
|
|
}
|
|
|
|
// Returns true iff uses of LiveReg/LiveLaneMask in PHI UseMI are coming from
|
|
// a backedge when starting at CurMI.
|
|
bool isIncomingValFromBackedge(Register LiveReg, LaneBitmask LiveLaneMask,
|
|
const MachineInstr *CurMI,
|
|
const MachineInstr *UseMI) const {
|
|
if (!UseMI->isPHI())
|
|
return false;
|
|
|
|
MachineLoop *CurLoop = MLI->getLoopFor(CurMI->getParent());
|
|
MachineLoop *UseLoop = MLI->getLoopFor(UseMI->getParent());
|
|
|
|
// Not a backedge if ...
|
|
// A: not in a loop at all
|
|
// B: or CurMI is in a loop outside of UseLoop
|
|
// C: or UseMI is not in the UseLoop header
|
|
if (/*A:*/ !UseLoop ||
|
|
/*B:*/ (CurLoop && !UseLoop->contains(CurLoop)) ||
|
|
/*C:*/ UseMI->getParent() != UseLoop->getHeader())
|
|
return false;
|
|
|
|
SmallVector<MachineBasicBlock *, 2> Latches;
|
|
UseLoop->getLoopLatches(Latches);
|
|
|
|
const unsigned NumOps = UseMI->getNumOperands();
|
|
for (unsigned I = 1; I < NumOps; I += 2) {
|
|
const MachineOperand &RegMO = UseMI->getOperand(I - 1);
|
|
const MachineOperand &MBBMO = UseMI->getOperand(I);
|
|
assert(RegMO.isReg() && "Expected register operand of PHI");
|
|
assert(MBBMO.isMBB() && "Expected MBB operand of PHI");
|
|
if (RegMO.getReg() == LiveReg &&
|
|
machineOperandCoveredBy(RegMO, LiveLaneMask)) {
|
|
MachineBasicBlock *IncomingBB = MBBMO.getMBB();
|
|
if (llvm::is_contained(Latches, IncomingBB))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Return the distance from 'CurMI' through a parent loop backedge PHI Use
|
|
// ('UseMI').
|
|
CacheableNextUseDistance calcDistanceViaEnclosingBackedge(
|
|
const MachineInstr *CurMI, const MachineBasicBlock *CurMBB,
|
|
MachineLoop *CurLoop, const MachineInstr *UseMI,
|
|
const MachineBasicBlock *UseMBB, MachineLoop *UseLoop) const {
|
|
assert(UseLoop && "There is no backedge.");
|
|
assert(CurLoop && (UseLoop != CurLoop) && UseLoop->contains(CurLoop) &&
|
|
"Unexpected loop configuration");
|
|
|
|
InstrIdTy UseHeadLen = getHeadLen(UseMI);
|
|
MBBDistPair InnerLoopLD =
|
|
calcDistanceThroughSubLoopUse(CurMBB, CurLoop, UseLoop);
|
|
MBBDistPair LD = calcShortestDistanceToLatch(InnerLoopLD.MBB, UseLoop);
|
|
return {InstrInvariant,
|
|
InnerLoopLD.Distance + LD.Distance + getSize(LD.MBB) + UseHeadLen};
|
|
}
|
|
|
|
// Optimized version of calcBackedgeDistance when we already know that CurMI
|
|
// and UseMI are in the same basic block
|
|
NextUseDistance calcBackedgeDistance(const MachineInstr *CurMI,
|
|
const MachineBasicBlock *CurMBB,
|
|
MachineLoop *CurLoop,
|
|
const MachineInstr *UseMI) const {
|
|
// use is in the next loop iteration
|
|
InstrIdTy CurTailLen = getTailLen(CurMI);
|
|
InstrIdTy UseHeadLen = getHeadLen(UseMI);
|
|
MBBDistPair LD = calcShortestUnweightedDistanceToLatch(CurMBB, CurLoop);
|
|
const MachineBasicBlock *HdrMBB = CurLoop->getHeader();
|
|
NextUseDistance Hdr = CurMBB == HdrMBB ? 0 : getSize(HdrMBB);
|
|
NextUseDistance Dst =
|
|
CurMBB == HdrMBB ? 0 : getShortestUnweightedPath(HdrMBB, CurMBB);
|
|
|
|
return CurTailLen + LD.Distance + getSize(LD.MBB) + Hdr + Dst + UseHeadLen;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------
|
|
// Calculate inter-instruction distances
|
|
//----------------------------------------------------------------------------
|
|
private:
|
|
// Calculate the shortest weighted path from MachineInstruction 'FromMI' to
|
|
// 'ToMI'. It is weighted distance in that paths that exit loops are made to
|
|
// look much further away.
|
|
NextUseDistance calcShortestDistance(const MachineInstr *FromMI,
|
|
const MachineInstr *ToMI) const {
|
|
const MachineBasicBlock *FromMBB = FromMI->getParent();
|
|
const MachineBasicBlock *ToMBB = ToMI->getParent();
|
|
|
|
if (FromMBB == ToMBB) {
|
|
NextUseDistance RV = getDistance(FromMI, ToMI);
|
|
assert(RV >= 0 && "unexpected negative distance from getDistance");
|
|
return RV;
|
|
}
|
|
|
|
InstrIdTy FromTailLen = getTailLen(FromMI);
|
|
InstrIdTy ToHeadLen = getHeadLen(ToMI);
|
|
NextUseDistance Dst = getShortestPath(FromMBB, ToMBB);
|
|
assert(Dst.isReachable() &&
|
|
"calcShortestDistance called for instructions in non-reachable"
|
|
" basic blocks!");
|
|
NextUseDistance RV = FromTailLen + Dst + ToHeadLen;
|
|
assert(RV >= 0 && "unexpected negative distance");
|
|
return RV;
|
|
}
|
|
|
|
// Calculate the shortest unweighted path from MachineInstruction 'FromMI' to
|
|
// 'ToMI'. In contrast with 'calcShortestDistance', distances are based solely
|
|
// on basic block instruction counts and traversing a loop exit does not
|
|
// affect the value.
|
|
NextUseDistance
|
|
calcShortestUnweightedDistance(const MachineInstr *FromMI,
|
|
const MachineInstr *ToMI) const {
|
|
const MachineBasicBlock *FromMBB = FromMI->getParent();
|
|
const MachineBasicBlock *ToMBB = ToMI->getParent();
|
|
|
|
if (FromMBB == ToMBB)
|
|
return getDistance(FromMI, ToMI);
|
|
|
|
InstrIdTy FromTailLen = getTailLen(FromMI);
|
|
InstrIdTy ToHeadLen = getHeadLen(ToMI);
|
|
NextUseDistance Dst = getShortestUnweightedPath(FromMBB, ToMBB);
|
|
assert(Dst.isReachable() &&
|
|
"calcShortestUnweightedDistance called for instructions in"
|
|
" non-reachable basic blocks!");
|
|
return FromTailLen + Dst + ToHeadLen;
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// calcDistanceToUse* - various flavors of calculating the distance from an
|
|
// instruction 'CurMI' to the use of a live [sub]register.
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
// Return the distance from 'CurMI' to a live [sub]register use ('UseMI').
|
|
//
|
|
// Cfg flags controlling behavior:
|
|
// PreciseUseModeling — rewrite PHI uses to their incoming edge block;
|
|
// also selects unweighted cross-block distance
|
|
// PromoteToPreheader — route loop-entry / inner-loop uses to the preheader
|
|
CacheableNextUseDistance
|
|
calcDistanceToUse(Register LiveReg, LaneBitmask LiveLaneMask,
|
|
const MachineInstr &CurMI,
|
|
const MachineOperand *UseMO) const {
|
|
const MachineInstr *UseMI = UseMO->getParent();
|
|
const MachineBasicBlock *CurMBB = CurMI.getParent();
|
|
const MachineBasicBlock *UseMBB = UseMI->getParent();
|
|
MachineLoop *CurLoop = MLI->getLoopFor(CurMBB);
|
|
MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
|
|
|
|
if (Cfg.PreciseUseModeling) {
|
|
// Map PHI use to the end of its incoming edge block.
|
|
if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
|
|
UseMI = &PhiUseEdge->back();
|
|
UseMBB = PhiUseEdge;
|
|
UseLoop = MLI->getLoopFor(PhiUseEdge);
|
|
}
|
|
}
|
|
|
|
enum class LoopConfig {
|
|
NoCur,
|
|
Same,
|
|
CurContainsUse,
|
|
UseContainsCur,
|
|
Siblings,
|
|
Unrelated
|
|
};
|
|
auto [LpCfg, PreHdr, CommonParent] = [&]()
|
|
-> std::tuple<LoopConfig, const MachineBasicBlock *, MachineLoop *> {
|
|
if (!CurLoop) {
|
|
return {LoopConfig::NoCur, getOutermostPreheader(UseLoop), nullptr};
|
|
}
|
|
if (CurLoop->contains(UseLoop)) {
|
|
return {CurMBB == UseMBB ? LoopConfig::Same
|
|
: LoopConfig::CurContainsUse,
|
|
findChildPreheader(CurLoop, UseLoop), nullptr};
|
|
}
|
|
|
|
if (MachineLoop *P = findCommonParent(UseLoop, CurLoop).first) {
|
|
if (P != UseLoop)
|
|
return {LoopConfig::Siblings, findChildPreheader(P, UseLoop), P};
|
|
return {LoopConfig::UseContainsCur, nullptr, nullptr};
|
|
}
|
|
return {LoopConfig::Unrelated, getOutermostPreheader(UseLoop), nullptr};
|
|
}();
|
|
|
|
//--------------------------------------------------------------------------
|
|
// Don't PromoteToPreheader
|
|
//--------------------------------------------------------------------------
|
|
if (!Cfg.PromoteToPreheader) {
|
|
switch (LpCfg) {
|
|
case LoopConfig::NoCur:
|
|
case LoopConfig::Same:
|
|
case LoopConfig::CurContainsUse:
|
|
return {InstrRelative, calcShortestDistance(&CurMI, UseMI)};
|
|
|
|
case LoopConfig::UseContainsCur: {
|
|
if (isIncomingValFromBackedge(LiveReg, LiveLaneMask, &CurMI, UseMI)) {
|
|
return calcDistanceViaEnclosingBackedge(&CurMI, CurMBB, CurLoop,
|
|
UseMI, UseMBB, UseLoop);
|
|
}
|
|
|
|
return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
|
|
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
|
|
}
|
|
case LoopConfig::Siblings:
|
|
case LoopConfig::Unrelated:
|
|
return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
|
|
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
|
|
}
|
|
llvm_unreachable("unexpected loop configuration!");
|
|
}
|
|
|
|
//--------------------------------------------------------------------------
|
|
// PromoteToPreheader
|
|
//--------------------------------------------------------------------------
|
|
if (PreHdr) {
|
|
UseMI = &PreHdr->back();
|
|
UseMBB = PreHdr;
|
|
UseLoop = CommonParent;
|
|
}
|
|
|
|
switch (LpCfg) {
|
|
case LoopConfig::NoCur:
|
|
return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI) -
|
|
(sizeOf(*UseMI) ? 0 : 1)};
|
|
|
|
case LoopConfig::Same:
|
|
case LoopConfig::CurContainsUse:
|
|
if (CurMBB == UseMBB && !instrsAreInOrder(&CurMI, UseMI))
|
|
return {InstrRelative,
|
|
calcBackedgeDistance(&CurMI, CurMBB, CurLoop, UseMI)};
|
|
|
|
return {InstrRelative, calcShortestUnweightedDistance(&CurMI, UseMI)};
|
|
|
|
case LoopConfig::UseContainsCur:
|
|
case LoopConfig::Siblings:
|
|
return {InstrInvariant, calcDistanceThroughSubLoopToUseMI(
|
|
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
|
|
|
|
case LoopConfig::Unrelated:
|
|
return {InstrInvariant, calcDistanceThroughLoopToOutsideLoopUseMI(
|
|
CurMBB, CurLoop, UseMI, UseMBB, UseLoop)};
|
|
}
|
|
llvm_unreachable("unexpected loop configuration!");
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// getUses helpers (compute mode)
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
// Returns true if Use is reachable from MI. Handles backedges and intervening
|
|
// defs.
|
|
bool isUseReachablePrecise(const MachineInstr &MI,
|
|
const MachineBasicBlock *MBB,
|
|
const MachineOperand *UseMO,
|
|
const MachineInstr *UseMI,
|
|
const MachineBasicBlock *UseMBB) const {
|
|
|
|
// Filter out uses that are clearly unreachable
|
|
if (MBB != UseMBB && !isReachable(MBB, UseMBB))
|
|
return false;
|
|
|
|
// PHI uses are considered part of the incoming BB. Check for reachability
|
|
// at the edge.
|
|
if (auto *PhiUseEdge = getIncomingBlockIfPhiUse(UseMI, UseMO)) {
|
|
if (!isReachableOrSame(MBB, PhiUseEdge))
|
|
return false;
|
|
}
|
|
|
|
// Filter out uses with an intermediate def.
|
|
const MachineInstr *DefMI = MRI->getUniqueVRegDef(UseMO->getReg());
|
|
const MachineBasicBlock *DefMBB = DefMI->getParent();
|
|
if (MBB == UseMBB) {
|
|
if (UseMI->isPHI() && MBB == DefMBB)
|
|
return true;
|
|
|
|
if (instrsAreInOrder(&MI, UseMI))
|
|
return true;
|
|
|
|
// A Def in the loop means that the value at MI will not survive through
|
|
// to this use.
|
|
MachineLoop *UseLoop = MLI->getLoopFor(UseMBB);
|
|
return UseLoop && !UseLoop->contains(DefMBB);
|
|
}
|
|
|
|
if (MBB == DefMBB)
|
|
return instrsAreInOrder(DefMI, &MI);
|
|
|
|
MachineLoop *Loop = MLI->getLoopFor(MBB);
|
|
if (!Loop)
|
|
return true;
|
|
|
|
MachineLoop *TopLoop = Loop->getOutermostLoop();
|
|
return !TopLoop->contains(DefMBB) || !isReachable(MBB, DefMBB) ||
|
|
!isForwardReachable(UseMBB, MBB);
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Debug/Developer Helpers
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
/// Goes over all MBB pairs in \p MF, calculates the shortest path between
|
|
/// them.
|
|
void populatePathTable() {
|
|
for (const MachineBasicBlock &MBB1 : *MF) {
|
|
for (const MachineBasicBlock &MBB2 : *MF) {
|
|
if (&MBB1 == &MBB2)
|
|
continue;
|
|
getShortestPath(&MBB1, &MBB2);
|
|
}
|
|
}
|
|
}
|
|
|
|
void printPaths(raw_ostream &OS) const {
|
|
OS << "\n---------------- Paths --------------- {\n";
|
|
for (const auto &[P, PI] : Paths) {
|
|
OS << " " << printMBBReference(*P.src()) << " -> "
|
|
<< printMBBReference(*P.dst()) << ": ";
|
|
PI.print(OS);
|
|
OS << '\n';
|
|
}
|
|
OS << "}\n";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dumpPaths() const { printPaths(dbgs()); }
|
|
|
|
// Legacy alias kept for existing call sites.
|
|
void dumpShortestPaths() const {
|
|
for (const auto &P : Paths) {
|
|
const MachineBasicBlock *From = P.first.src();
|
|
const MachineBasicBlock *To = P.first.dst();
|
|
std::optional<NextUseDistance> Dist = P.second.ShortestDistance;
|
|
dbgs() << "From: " << printMBBReference(*From)
|
|
<< "-> To:" << printMBBReference(*To) << " = "
|
|
<< Dist.value_or(-1).fmt() << "\n";
|
|
}
|
|
}
|
|
|
|
void printInterBlockDistances(raw_ostream &OS) const {
|
|
using MBBPair = std::pair<unsigned, unsigned>;
|
|
using Elem = std::pair<NextUseDistance, MBBPair>;
|
|
std::vector<Elem> SortedDistances;
|
|
|
|
for (const auto &[FromNum, Dsts] : InterBlockDistances) {
|
|
for (const auto &[ToNum, Dist] : Dsts) {
|
|
SortedDistances.emplace_back(Dist.Weighted, MBBPair(FromNum, ToNum));
|
|
}
|
|
}
|
|
llvm::sort(SortedDistances, [](const auto &A, const auto &B) {
|
|
if (A.first != B.first)
|
|
return A.first < B.first;
|
|
|
|
if (A.second.first != B.second.first)
|
|
return A.second.first < B.second.first;
|
|
|
|
return A.second.second < B.second.second;
|
|
});
|
|
|
|
OS << "\n--------- InterBlockDistances -------- {\n";
|
|
for (const Elem &E : SortedDistances) {
|
|
|
|
OS << " bb." << E.second.first << " -> bb." << E.second.second << ": ";
|
|
E.first.print(OS);
|
|
OS << '\n';
|
|
}
|
|
OS << "}\n";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dumpInterBlockDistances() const {
|
|
printInterBlockDistances(dbgs());
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// LiveRegUse Caching - A cache of the distances for the last
|
|
// MachineInstruction. When getting the distances for a MachineInstruction, if
|
|
// it is the same basic block as the cached instruction, we can generally use
|
|
// an offset from the cached values to compute the distances. There are some
|
|
// exceptions - see 'cacheLiveRegUse'.
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
struct LiveRegToUseMapElem {
|
|
LiveRegUse Use;
|
|
bool MIDependent;
|
|
LiveRegToUseMapElem() : Use(), MIDependent(false) {}
|
|
LiveRegToUseMapElem(LiveRegUse U, bool MIDep)
|
|
: Use(U), MIDependent(MIDep) {}
|
|
|
|
void print(raw_ostream &OS) const {
|
|
Use.print(OS);
|
|
OS << (MIDependent ? " [mi-dep]" : " [mi-indep]");
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dump() const {
|
|
print(dbgs());
|
|
dbgs() << '\n';
|
|
}
|
|
};
|
|
|
|
// Using std::map because LaneBitmask does not work out-of-the-box as a
|
|
// DenseMap key and I did not see a performance benefit over std::map.
|
|
using LaneBitmaskToUseMap = std::map<LaneBitmask, LiveRegToUseMapElem>;
|
|
using LiveRegToUseMap = DenseMap<Register, LaneBitmaskToUseMap>;
|
|
|
|
const MachineInstr *CachedDistancesMI = nullptr;
|
|
LiveRegToUseMap CachedDistances;
|
|
LiveRegToUseMap PendingCachedDistances;
|
|
unsigned DistanceCacheHits = 0;
|
|
unsigned DistanceCacheMisses = 0;
|
|
|
|
void resetDistanceCache() {
|
|
CachedDistancesMI = nullptr;
|
|
CachedDistances.clear();
|
|
DistanceCacheHits = 0;
|
|
DistanceCacheMisses = 0;
|
|
}
|
|
|
|
void maybeClearCachedLiveRegUses(const MachineInstr &MI) {
|
|
if (CachedDistancesMI &&
|
|
(CachedDistancesMI->getParent() != MI.getParent() ||
|
|
!instrsAreInOrder(CachedDistancesMI, &MI))) {
|
|
CachedDistancesMI = nullptr;
|
|
CachedDistances.clear();
|
|
}
|
|
}
|
|
|
|
bool okToUseCacheElem(const LiveRegToUseMapElem &CacheElem,
|
|
const MachineInstr &MI, const InstrIdTy LastDelta) {
|
|
if (!CacheElem.MIDependent)
|
|
return true;
|
|
|
|
const LiveRegUse &U = CacheElem.Use;
|
|
|
|
// Never okay to produce a negative distance
|
|
if (U.Dist < LastDelta)
|
|
return false;
|
|
|
|
const MachineInstr *UseMI = U.Use->getParent();
|
|
|
|
// Always okay if use is in another basic block or UseMI is MI
|
|
if (UseMI->getParent() != MI.getParent() || UseMI == &MI)
|
|
return true;
|
|
|
|
// If CachedDistancesMI <= Use < MI we could have a problem since we don't
|
|
// know if Use is still reachable.
|
|
return !instrsAreInOrder(CachedDistancesMI, UseMI) ||
|
|
!instrsAreInOrder(UseMI, &MI);
|
|
}
|
|
|
|
std::pair<const LaneBitmaskToUseMap *, const LiveRegToUseMapElem *>
|
|
findCachedLiveRegUse(Register Reg, LaneBitmask LaneMask,
|
|
const MachineInstr &MI, const InstrIdTy LastDelta) {
|
|
if (!DistanceCacheEnabled)
|
|
return {nullptr, nullptr};
|
|
|
|
++DistanceCacheMisses; // Assume miss
|
|
auto I = CachedDistances.find(Reg);
|
|
if (I == CachedDistances.end())
|
|
return {nullptr, nullptr};
|
|
const LaneBitmaskToUseMap &RegSlot = I->second;
|
|
if (RegSlot.empty())
|
|
return {nullptr, nullptr};
|
|
|
|
auto J = RegSlot.find(LaneMask);
|
|
if (J == RegSlot.end())
|
|
return {nullptr, nullptr};
|
|
|
|
const LiveRegToUseMapElem &MaskSlot = J->second;
|
|
if (!okToUseCacheElem(MaskSlot, MI, LastDelta))
|
|
return {nullptr, nullptr};
|
|
|
|
--DistanceCacheMisses;
|
|
++DistanceCacheHits;
|
|
return {&RegSlot, &MaskSlot};
|
|
}
|
|
|
|
void cacheLiveRegUse(const MachineInstr &MI, Register Reg, LaneBitmask Mask,
|
|
LiveRegUse U, bool MIDependent) {
|
|
if (!DistanceCacheEnabled)
|
|
return;
|
|
|
|
auto I = PendingCachedDistances.try_emplace(Reg).first;
|
|
LaneBitmaskToUseMap &RegSlot = I->second;
|
|
RegSlot.try_emplace(Mask, U, MIDependent);
|
|
}
|
|
|
|
void updateCachedLiveRegUses(const MachineInstr &MI) {
|
|
if (!DistanceCacheEnabled)
|
|
return;
|
|
|
|
CachedDistancesMI = &MI;
|
|
CachedDistances = std::move(PendingCachedDistances);
|
|
PendingCachedDistances.clear();
|
|
LLVM_DEBUG(dumpDistanceCache());
|
|
}
|
|
|
|
void printDistanceCache(raw_ostream &OS) const {
|
|
OS << "\n----------- Distance Cache ----------- {\n";
|
|
OS << " CachedAt: ";
|
|
if (CachedDistancesMI)
|
|
OS << *CachedDistancesMI;
|
|
else
|
|
OS << "<none>\n";
|
|
|
|
constexpr size_t RegNameWidth = 20;
|
|
for (const auto &[Reg, ByMask] : CachedDistances) {
|
|
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
|
LaneBitmask AllLanes = MRI->getMaxLaneMaskForVReg(Reg);
|
|
|
|
for (const auto &[Mask, Elem] : ByMask) {
|
|
std::string RegName;
|
|
raw_string_ostream KOS(RegName);
|
|
if (Mask == AllLanes) {
|
|
KOS << printReg(Reg);
|
|
} else {
|
|
SmallVector<unsigned> Indexes;
|
|
TRI->getCoveringSubRegIndexes(RC, Mask, Indexes);
|
|
if (Indexes.size() == 1)
|
|
KOS << printReg(Reg, TRI, Indexes.front(), MRI);
|
|
else
|
|
KOS << printReg(Reg) << " mask=" << Mask.getAsInteger();
|
|
}
|
|
OS << " " << left_justify(RegName, RegNameWidth) << " : ";
|
|
Elem.print(OS);
|
|
OS << '\n';
|
|
}
|
|
}
|
|
OS << " (hits=" << DistanceCacheHits << " misses=" << DistanceCacheMisses
|
|
<< ")\n";
|
|
OS << "}\n";
|
|
}
|
|
|
|
LLVM_DUMP_METHOD void dumpDistanceCache() const {
|
|
printDistanceCache(dbgs());
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Processing Live Reg Uses
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
// Decompose each use in 'Uses' by sub-reg and store the nearest one in
|
|
// 'UseByMask'. Ignores subregs matching 'LiveRegLaneMask' - these are handled
|
|
// as registers, not sub-regs.
|
|
DenseMap<const TargetRegisterClass *, SmallVector<unsigned>>
|
|
SubRegIndexesForRegClass;
|
|
void collectSubRegUsesByMask(
|
|
const SmallVectorImpl<const MachineOperand *> &Uses,
|
|
const SmallVectorImpl<CacheableNextUseDistance> &Distances,
|
|
LaneBitmask LiveRegLaneMask, LaneBitmaskToUseMap &UseByMask) {
|
|
|
|
assert(Uses.size());
|
|
assert(Uses.size() == Distances.size());
|
|
|
|
const TargetRegisterClass *RC = MRI->getRegClass(Uses.front()->getReg());
|
|
auto [SRI, Inserted] = SubRegIndexesForRegClass.try_emplace(RC);
|
|
if (Inserted)
|
|
TRI->getCoveringSubRegIndexes(RC, LaneBitmask::getAll(), SRI->second);
|
|
const SmallVector<unsigned> &RCSubRegIndexes = SRI->second;
|
|
|
|
unsigned OneIndex; // Backing store for 'Indexes' below when 1 index
|
|
for (size_t I = 0; I < Uses.size(); ++I) {
|
|
const MachineOperand *MO = Uses[I];
|
|
auto [SubRegMIDep, Dist] = Distances[I];
|
|
const LiveRegUse LRU{MO, Dist};
|
|
|
|
ArrayRef<unsigned> Indexes;
|
|
if (MO->getSubReg()) {
|
|
OneIndex = MO->getSubReg();
|
|
Indexes = ArrayRef(OneIndex);
|
|
} else {
|
|
Indexes = RCSubRegIndexes;
|
|
}
|
|
|
|
for (unsigned Idx : Indexes) {
|
|
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(Idx);
|
|
if (Mask.all() || Mask == LiveRegLaneMask)
|
|
continue;
|
|
|
|
auto &[SlotU, SlotMIDep] = UseByMask[Mask];
|
|
if (updateClosest(SlotU, LRU))
|
|
SlotMIDep = SubRegMIDep;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Similar to 'collectSubRegUsesByMask' above, but uses cached distances.
|
|
void collectSubRegUsesByMaskFromCache(const LaneBitmaskToUseMap &CachedMap,
|
|
LaneBitmask LiveRegLaneMask,
|
|
const MachineInstr *MI,
|
|
InstrIdTy LastDelta,
|
|
LaneBitmaskToUseMap &UseByMask) {
|
|
|
|
for (const auto &KV : CachedMap) {
|
|
LaneBitmask SubregLaneMask = KV.first;
|
|
if (SubregLaneMask.all() || SubregLaneMask == LiveRegLaneMask)
|
|
continue;
|
|
|
|
const LiveRegToUseMapElem &SubregE = KV.second;
|
|
if (!okToUseCacheElem(SubregE, *MI, LastDelta))
|
|
continue;
|
|
|
|
const bool MIDep = SubregE.MIDependent;
|
|
LiveRegUse U = SubregE.Use;
|
|
if (MIDep)
|
|
U.Dist -= LastDelta;
|
|
|
|
auto &[SlotU, SlotMIDep] = UseByMask[SubregLaneMask];
|
|
if (updateClosest(SlotU, U))
|
|
SlotMIDep = MIDep;
|
|
}
|
|
}
|
|
|
|
// Loops through 'UseByMask' finding the furthest sub-register and updating
|
|
// 'FurthestSubreg' accordingly.
|
|
void updateFurthestSubReg(
|
|
const MachineInstr &MI, const LiveRegUse &U,
|
|
const LaneBitmaskToUseMap &UseByMask,
|
|
DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses,
|
|
LiveRegUse &FurthestSubreg) {
|
|
|
|
if (UseByMask.empty()) {
|
|
updateFurthest(FurthestSubreg, U);
|
|
return;
|
|
}
|
|
|
|
for (const auto &KV : UseByMask) {
|
|
const LiveRegUse &SubregU = KV.second.Use;
|
|
const bool SubregMIDep = KV.second.MIDependent;
|
|
|
|
if (RelevantUses)
|
|
RelevantUses->try_emplace(SubregU.Use, SubregU);
|
|
cacheLiveRegUse(MI, SubregU.Use->getReg(), KV.first, SubregU,
|
|
SubregMIDep);
|
|
updateFurthest(FurthestSubreg, SubregU);
|
|
}
|
|
}
|
|
|
|
// Used to populate 'MIDefs' to be passed to 'getNextUseDistances'.
|
|
SmallSet<Register, 4> collectDefinedRegisters(const MachineInstr &MI) const {
|
|
SmallSet<Register, 4> MIDefs;
|
|
|
|
for (const MachineOperand &MO : MI.all_defs()) {
|
|
if (MO.isReg() && MO.getReg().isValid() && hasAtLeastOneUse(MO.getReg()))
|
|
MIDefs.insert(MO.getReg());
|
|
}
|
|
return MIDefs;
|
|
}
|
|
|
|
// Computes distances from 'MI' to each registers in 'LiveRegs'. Returns the
|
|
// furthest register and (optionally) sub-register in 'Furthest' and
|
|
// 'FurthestSubreg' respectively.
|
|
public:
|
|
void getNextUseDistances(const GCNRPTracker::LiveRegSet &LiveRegs,
|
|
const MachineInstr &MI, LiveRegUse &Furthest,
|
|
LiveRegUse *FurthestSubreg = nullptr,
|
|
DenseMap<const MachineOperand *, UseDistancePair>
|
|
*RelevantUses = nullptr) {
|
|
const SmallSet<Register, 4> MIDefs(collectDefinedRegisters(MI));
|
|
|
|
SmallVector<const MachineOperand *> Uses;
|
|
SmallVector<CacheableNextUseDistance> Distances;
|
|
LaneBitmaskToUseMap UseByMask;
|
|
|
|
maybeClearCachedLiveRegUses(MI);
|
|
const InstrIdTy LastDelta =
|
|
CachedDistancesMI ? getDistance(CachedDistancesMI, &MI) : 0;
|
|
|
|
for (auto &KV : LiveRegs) {
|
|
const Register Reg = KV.first;
|
|
const LaneBitmask LaneMask = KV.second;
|
|
|
|
if (MIDefs.contains(Reg))
|
|
continue;
|
|
|
|
Uses.clear();
|
|
UseByMask.clear();
|
|
|
|
LiveRegUse U;
|
|
bool MIDependent = false;
|
|
auto [CacheMap, CacheElem] =
|
|
findCachedLiveRegUse(Reg, LaneMask, MI, LastDelta);
|
|
if (CacheMap && CacheElem) {
|
|
MIDependent = CacheElem->MIDependent;
|
|
U = CacheElem->Use;
|
|
if (MIDependent)
|
|
U.Dist -= LastDelta;
|
|
} else {
|
|
getReachableUses(Reg, LaneMask, MI, Uses);
|
|
if (Uses.empty())
|
|
continue;
|
|
|
|
const MachineOperand *NextUse = nullptr;
|
|
NextUseDistance Dist = getShortestDistance(
|
|
Reg, LaneMask, MI, Uses, &NextUse, &MIDependent, &Distances);
|
|
U = LiveRegUse{NextUse, Dist};
|
|
}
|
|
|
|
if (RelevantUses)
|
|
RelevantUses->try_emplace(U.Use, U);
|
|
cacheLiveRegUse(MI, Reg, LaneMask, U, MIDependent);
|
|
|
|
updateFurthest(Furthest, U);
|
|
|
|
if (!FurthestSubreg)
|
|
continue;
|
|
|
|
if (CacheMap) {
|
|
collectSubRegUsesByMaskFromCache(*CacheMap, LaneMask, &MI, LastDelta,
|
|
UseByMask);
|
|
} else {
|
|
collectSubRegUsesByMask(Uses, Distances, LaneMask, UseByMask);
|
|
}
|
|
updateFurthestSubReg(MI, U, UseByMask, RelevantUses, *FurthestSubreg);
|
|
}
|
|
updateCachedLiveRegUses(MI);
|
|
}
|
|
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
// Helper methods for printAsJson
|
|
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
private:
|
|
static format_object<unsigned> Fmt(unsigned Id) { return format("%u", Id); }
|
|
|
|
public:
|
|
void printVerboseInstrFields(json::OStream &J, const MachineInstr &MI) const {
|
|
J.attribute("id", getInstrId(&MI));
|
|
J.attribute("head-len", getHeadLen(&MI));
|
|
J.attribute("tail-len", getTailLen(&MI));
|
|
}
|
|
|
|
void printPaths(json::OStream &J, ModuleSlotTracker &MST) const {
|
|
J.attributeBegin("paths");
|
|
J.arrayBegin();
|
|
for (const auto &KV : Paths) {
|
|
const Path &P = KV.first;
|
|
const PathInfo &PI = KV.second;
|
|
|
|
J.objectBegin();
|
|
|
|
printMBBNameAttr(J, "src", *P.src(), MST);
|
|
printMBBNameAttr(J, "dst", *P.dst(), MST);
|
|
|
|
if (PI.ShortestDistance.has_value()) {
|
|
J.attribute("shortest-distance",
|
|
PI.ShortestDistance.value().toJsonValue());
|
|
} else {
|
|
J.attribute("shortest-distance", nullptr);
|
|
}
|
|
|
|
if (PI.ShortestUnweightedDistance.has_value()) {
|
|
J.attribute("shortest-unweighted-distance",
|
|
PI.ShortestUnweightedDistance.value().toJsonValue());
|
|
} else {
|
|
J.attribute("shortest-unweighted-distance", nullptr);
|
|
}
|
|
|
|
J.attribute("edge-kind", static_cast<int>(PI.EK));
|
|
J.attribute("reachable", PI.Reachable);
|
|
J.attribute("forward-reachable", PI.ForwardReachable);
|
|
|
|
J.objectEnd();
|
|
}
|
|
J.arrayEnd();
|
|
J.attributeEnd();
|
|
}
|
|
|
|
public:
|
|
AMDGPUNextUseAnalysisImpl(const MachineFunction *, const MachineLoopInfo *);
|
|
~AMDGPUNextUseAnalysisImpl() { clearTables(); }
|
|
|
|
AMDGPUNextUseAnalysis::Config getConfig() const { return Cfg; }
|
|
void setConfig(AMDGPUNextUseAnalysis::Config NewCfg) {
|
|
Cfg = NewCfg;
|
|
clearTables();
|
|
initializeTables();
|
|
}
|
|
|
|
unsigned getDistanceCacheHits() const { return DistanceCacheHits; }
|
|
unsigned getDistanceCacheMisses() const { return DistanceCacheMisses; }
|
|
|
|
void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
|
|
const MachineInstr &MI,
|
|
SmallVector<const MachineOperand *> &Uses) const;
|
|
|
|
/// \Returns the shortest next-use distance for \p LiveReg.
|
|
NextUseDistance
|
|
getShortestDistance(Register LiveReg, LaneBitmask LaneMask,
|
|
const MachineInstr &FromMI,
|
|
const SmallVector<const MachineOperand *> &Uses,
|
|
const MachineOperand **ShortestUseOut, bool *MIDependent,
|
|
SmallVector<CacheableNextUseDistance> *Distances) const;
|
|
|
|
NextUseDistance
|
|
getShortestDistance(Register LiveReg, const MachineInstr &FromMI,
|
|
const SmallVector<const MachineOperand *> &Uses) const {
|
|
return getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI, Uses,
|
|
nullptr, nullptr, nullptr);
|
|
}
|
|
};
|
|
|
|
AMDGPUNextUseAnalysisImpl::AMDGPUNextUseAnalysisImpl(
|
|
const MachineFunction *MF, const MachineLoopInfo *ML) {
|
|
|
|
this->MF = MF;
|
|
this->MLI = ML;
|
|
|
|
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
|
TII = ST.getInstrInfo();
|
|
TRI = &TII->getRegisterInfo();
|
|
MRI = &MF->getRegInfo();
|
|
|
|
// FIXME: Hopefully we will soon converge on a single way of calculating
|
|
// next-use distance and remove these presets.
|
|
if (ConfigPresetOpt == "compute")
|
|
Cfg = AMDGPUNextUseAnalysis::Config::Compute();
|
|
else
|
|
Cfg = AMDGPUNextUseAnalysis::Config::Graphics();
|
|
|
|
if (ConfigCountPhisOpt.getNumOccurrences())
|
|
Cfg.CountPhis = ConfigCountPhisOpt;
|
|
if (ConfigForwardOnlyOpt.getNumOccurrences())
|
|
Cfg.ForwardOnly = ConfigForwardOnlyOpt;
|
|
if (ConfigPreciseUseModelingOpt.getNumOccurrences())
|
|
Cfg.PreciseUseModeling = ConfigPreciseUseModelingOpt;
|
|
if (ConfigPromoteToPreheaderOpt.getNumOccurrences())
|
|
Cfg.PromoteToPreheader = ConfigPromoteToPreheaderOpt;
|
|
|
|
initializeTables();
|
|
}
|
|
|
|
NextUseDistance AMDGPUNextUseAnalysisImpl::getShortestDistance(
|
|
Register LiveReg, LaneBitmask LaneMask, const MachineInstr &CurMI,
|
|
const SmallVector<const MachineOperand *> &Uses,
|
|
const MachineOperand **ShortestUseOut, bool *CurMIDependentOut,
|
|
SmallVector<CacheableNextUseDistance> *Distances) const {
|
|
|
|
assert(!LiveReg.isPhysical() && !TRI->isAGPR(*MRI, LiveReg) &&
|
|
"Next-use distance is calculated for SGPRs and VGPRs");
|
|
const MachineOperand *NextUse = nullptr;
|
|
auto NextUseDist = NextUseDistance::unreachable();
|
|
bool CurMIDependent = false;
|
|
|
|
if (Distances) {
|
|
Distances->clear();
|
|
Distances->reserve(Uses.size());
|
|
}
|
|
for (auto *UseMO : Uses) {
|
|
auto [Dep, D] = calcDistanceToUse(LiveReg, LaneMask, CurMI, UseMO);
|
|
|
|
if (D < NextUseDist) {
|
|
NextUseDist = D;
|
|
NextUse = UseMO;
|
|
CurMIDependent = Dep;
|
|
}
|
|
|
|
if (Distances)
|
|
Distances->push_back({Dep, D});
|
|
}
|
|
if (ShortestUseOut)
|
|
*ShortestUseOut = NextUse;
|
|
if (CurMIDependentOut)
|
|
*CurMIDependentOut = CurMIDependent;
|
|
|
|
assert(NextUseDist.isReachable() &&
|
|
"getShortestDistance called with no reachable uses");
|
|
return NextUseDist;
|
|
}
|
|
|
|
void AMDGPUNextUseAnalysisImpl::getReachableUses(
|
|
Register Reg, LaneBitmask LaneMask, const MachineInstr &MI,
|
|
SmallVector<const MachineOperand *> &Uses) const {
|
|
const bool CheckMask = LaneMask != LaneBitmask::getAll() &&
|
|
LaneMask != MRI->getMaxLaneMaskForVReg(Reg);
|
|
const MachineBasicBlock *MBB = MI.getParent();
|
|
|
|
for (const MachineOperand *UseMO : getRegisterUses(Reg)) {
|
|
const MachineInstr *UseMI = UseMO->getParent();
|
|
const MachineBasicBlock *UseMBB = UseMI->getParent();
|
|
|
|
if (CheckMask && !machineOperandCoveredBy(*UseMO, LaneMask))
|
|
continue;
|
|
|
|
bool Reachable;
|
|
if (Cfg.PreciseUseModeling)
|
|
Reachable = isUseReachablePrecise(MI, MBB, UseMO, UseMI, UseMBB);
|
|
else if (MBB == UseMBB)
|
|
Reachable = instrsAreInOrder(&MI, UseMI);
|
|
else
|
|
Reachable = isForwardReachable(MBB, UseMBB);
|
|
|
|
if (Reachable)
|
|
Uses.push_back(UseMO);
|
|
}
|
|
}
|
|
|
|
//==============================================================================
|
|
// AMDGPUNextUseAnalysis
|
|
//==============================================================================
|
|
AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(const MachineFunction *MF,
|
|
const MachineLoopInfo *MLI) {
|
|
Impl = std::make_unique<AMDGPUNextUseAnalysisImpl>(MF, MLI);
|
|
}
|
|
AMDGPUNextUseAnalysis::AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other)
|
|
: Impl(std::move(Other.Impl)) {}
|
|
AMDGPUNextUseAnalysis::~AMDGPUNextUseAnalysis() {}
|
|
|
|
AMDGPUNextUseAnalysis &
|
|
AMDGPUNextUseAnalysis::operator=(AMDGPUNextUseAnalysis &&Other) {
|
|
if (this != &Other)
|
|
Impl = std::move(Other.Impl);
|
|
return *this;
|
|
}
|
|
|
|
AMDGPUNextUseAnalysis::Config AMDGPUNextUseAnalysis::getConfig() const {
|
|
return Impl->getConfig();
|
|
}
|
|
|
|
void AMDGPUNextUseAnalysis::setConfig(Config Cfg) { Impl->setConfig(Cfg); }
|
|
|
|
/// \Returns the next-use distance for \p LiveReg.
|
|
NextUseDistance AMDGPUNextUseAnalysis::getShortestDistance(
|
|
Register LiveReg, const MachineInstr &FromMI,
|
|
const SmallVector<const MachineOperand *> &Uses,
|
|
const MachineOperand **ShortestUseOut,
|
|
SmallVector<NextUseDistance> *DistancesOut) const {
|
|
|
|
SmallVector<AMDGPUNextUseAnalysisImpl::CacheableNextUseDistance> Distances;
|
|
auto Dist = Impl->getShortestDistance(LiveReg, LaneBitmask::getAll(), FromMI,
|
|
Uses, ShortestUseOut, nullptr,
|
|
DistancesOut ? &Distances : nullptr);
|
|
if (DistancesOut) {
|
|
for (auto [MIDep, D] : Distances)
|
|
DistancesOut->push_back(D);
|
|
}
|
|
return Dist;
|
|
}
|
|
|
|
void AMDGPUNextUseAnalysis::getNextUseDistances(
|
|
const DenseMap<unsigned, LaneBitmask> &LiveRegs, const MachineInstr &MI,
|
|
UseDistancePair &FurthestOut, UseDistancePair *FurthestSubregOut,
|
|
DenseMap<const MachineOperand *, UseDistancePair> *RelevantUses) const {
|
|
|
|
LiveRegUse Furthest;
|
|
LiveRegUse FurthestSubreg;
|
|
Impl->getNextUseDistances(LiveRegs, MI, Furthest,
|
|
FurthestSubregOut ? &FurthestSubreg : nullptr,
|
|
RelevantUses);
|
|
FurthestOut = Furthest;
|
|
if (FurthestSubregOut)
|
|
*FurthestSubregOut = FurthestSubreg;
|
|
}
|
|
void AMDGPUNextUseAnalysis::getReachableUses(
|
|
Register LiveReg, LaneBitmask LaneMask, const MachineInstr &MI,
|
|
SmallVector<const MachineOperand *> &Uses) const {
|
|
return Impl->getReachableUses(LiveReg, LaneMask, MI, Uses);
|
|
}
|
|
|
|
//==============================================================================
|
|
// AMDGPUNextUseAnalysisLegacyPass
|
|
//==============================================================================
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Legacy Analysis Pass
|
|
//------------------------------------------------------------------------------
|
|
AMDGPUNextUseAnalysisLegacyPass::AMDGPUNextUseAnalysisLegacyPass()
|
|
: MachineFunctionPass(ID) {}
|
|
StringRef AMDGPUNextUseAnalysisLegacyPass::getPassName() const {
|
|
return "Next Use Analysis";
|
|
}
|
|
|
|
bool AMDGPUNextUseAnalysisLegacyPass::runOnMachineFunction(
|
|
MachineFunction &MF) {
|
|
const MachineLoopInfo *MLI =
|
|
&getAnalysis<MachineLoopInfoWrapperPass>().getLI();
|
|
NUA.reset(new AMDGPUNextUseAnalysis(&MF, MLI));
|
|
return false;
|
|
}
|
|
|
|
void AMDGPUNextUseAnalysisLegacyPass::getAnalysisUsage(
|
|
AnalysisUsage &AU) const {
|
|
AU.addRequired<MachineLoopInfoWrapperPass>();
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
char AMDGPUNextUseAnalysisLegacyPass::ID = 0;
|
|
char &llvm::AMDGPUNextUseAnalysisLegacyID = AMDGPUNextUseAnalysisLegacyPass::ID;
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
|
|
"Next Use Analysis", false, true)
|
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
|
|
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisLegacyPass, DEBUG_TYPE,
|
|
"Next Use Analysis", false, true)
|
|
|
|
FunctionPass *llvm::createAMDGPUNextUseAnalysisLegacyPass() {
|
|
return new AMDGPUNextUseAnalysisLegacyPass();
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// New Pass Manager Analysis Pass
|
|
//------------------------------------------------------------------------------
|
|
AnalysisKey AMDGPUNextUseAnalysisPass::Key;
|
|
|
|
AMDGPUNextUseAnalysisPass::Result
|
|
AMDGPUNextUseAnalysisPass::run(MachineFunction &MF,
|
|
MachineFunctionAnalysisManager &MFAM) {
|
|
const MachineLoopInfo &MLI = MFAM.getResult<MachineLoopAnalysis>(MF);
|
|
return AMDGPUNextUseAnalysis(&MF, &MLI);
|
|
}
|
|
|
|
//==============================================================================
|
|
// AMDGPUNextUseAnalysisPrinterLegacyPass
|
|
//==============================================================================
|
|
namespace {
|
|
void printInstrMember(json::OStream &J, ModuleSlotTracker &MST,
|
|
const MachineInstr &MI,
|
|
const AMDGPUNextUseAnalysisImpl &NUA) {
|
|
printStringAttr(J, "instr", MI, MST);
|
|
if (DumpNextUseDistanceVerbose)
|
|
NUA.printVerboseInstrFields(J, MI);
|
|
}
|
|
|
|
void printDistances(
|
|
json::OStream &J, const MachineRegisterInfo &MRI, const SIRegisterInfo &TRI,
|
|
ModuleSlotTracker &MST,
|
|
const DenseMap<const MachineOperand *, UseDistancePair> &Uses) {
|
|
if (!DumpNextUseDistanceVerbose)
|
|
return;
|
|
|
|
// Sorting isn't necessary for the purposes of JSON, but it reduces
|
|
// FileCheck differences.
|
|
SmallVector<const MachineOperand *> Keys;
|
|
for (const MachineOperand *K : Uses.keys())
|
|
Keys.push_back(K);
|
|
llvm::sort(Keys, [](const auto &A, const auto &B) {
|
|
return A->getReg() < B->getReg() ||
|
|
(A->getReg() == B->getReg() && A->getSubReg() < B->getSubReg());
|
|
});
|
|
|
|
J.attributeBegin("distances");
|
|
J.objectBegin();
|
|
|
|
for (const MachineOperand *K : Keys) {
|
|
const LiveRegUse U = Uses.at(K);
|
|
printAttr(J, printReg(U.getReg(), &TRI, U.getSubReg(), &MRI),
|
|
U.Dist.toJsonValue());
|
|
}
|
|
|
|
J.objectEnd();
|
|
J.attributeEnd();
|
|
}
|
|
|
|
void printFurthestUse(json::OStream &J, const MachineRegisterInfo &MRI,
|
|
const SIRegisterInfo &TRI, ModuleSlotTracker &MST,
|
|
const LiveRegUse F, bool Subreg = false) {
|
|
J.attributeBegin(Subreg ? "furthest-subreg" : "furthest");
|
|
J.objectBegin();
|
|
|
|
if (F.Use) {
|
|
printStringAttr(
|
|
J, "register",
|
|
printReg(F.getReg(), &TRI, Subreg ? F.getSubReg() : 0, &MRI));
|
|
|
|
if (DumpNextUseDistanceVerbose) {
|
|
printStringAttr(J, "use", [&](raw_ostream &OS) { OS << (*F.Use); });
|
|
printStringAttr(J, "use-mi", *F.Use->getParent(), MST);
|
|
}
|
|
J.attribute("distance", F.Dist.toJsonValue());
|
|
}
|
|
|
|
J.objectEnd();
|
|
J.attributeEnd();
|
|
}
|
|
|
|
void printDistanceFromDefToUse(json::OStream &J, const MachineFunction &MF,
|
|
const AMDGPUNextUseAnalysis &NUA,
|
|
const SIRegisterInfo &TRI,
|
|
const MachineRegisterInfo &MRI) {
|
|
auto getRegNextUseDistance = [&](Register DefReg) {
|
|
const MachineInstr &DefMI = *MRI.def_instr_begin(DefReg);
|
|
|
|
SmallVector<const MachineOperand *> Uses;
|
|
NUA.getReachableUses(DefReg, LaneBitmask::getAll(), DefMI, Uses);
|
|
if (Uses.empty())
|
|
return NextUseDistance::unreachable();
|
|
return NUA.getShortestDistance(DefReg, DefMI, Uses);
|
|
};
|
|
|
|
J.attributeBegin("distance-from-def-to-closest-use");
|
|
J.objectBegin();
|
|
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
for (const MachineInstr &MI : MBB) {
|
|
for (const MachineOperand &MO : MI.all_defs()) {
|
|
Register Reg = MO.getReg();
|
|
if (Reg.isPhysical())
|
|
continue;
|
|
NextUseDistance D = getRegNextUseDistance(Reg);
|
|
printAttr(J, printReg(Reg, &TRI, 0, &MRI), D.toJsonValue());
|
|
}
|
|
}
|
|
}
|
|
|
|
J.objectEnd();
|
|
J.attributeEnd();
|
|
}
|
|
|
|
void printNextUseDistancesAsJson(json::OStream &J, const MachineFunction &MF,
|
|
const AMDGPUNextUseAnalysis &NUA,
|
|
const AMDGPUNextUseAnalysisImpl &NUAImpl,
|
|
const LiveIntervals &LIS) {
|
|
using UseDistancePair = AMDGPUNextUseAnalysis::UseDistancePair;
|
|
const Function &F = MF.getFunction();
|
|
const Module *M = F.getParent();
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
const SIInstrInfo *TII = ST.getInstrInfo();
|
|
const SIRegisterInfo &TRI = TII->getRegisterInfo();
|
|
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
|
|
// We don't actually care about register pressure here - just using
|
|
// GCNDownwardRPTracker as a convenient way of getting the set of live
|
|
// registers at a given instruction.
|
|
GCNDownwardRPTracker RPTracker(LIS);
|
|
ModuleSlotTracker MST(M);
|
|
MST.incorporateFunction(F);
|
|
|
|
DenseMap<const MachineOperand *, UseDistancePair> RelevantUses;
|
|
|
|
J.attributeBegin("furthest-distances");
|
|
J.objectBegin();
|
|
|
|
for (const MachineBasicBlock &MBB : MF) {
|
|
std::string BBName;
|
|
raw_string_ostream BBOS(BBName);
|
|
MBB.printName(BBOS, MachineBasicBlock::PrintNameIr, &MST);
|
|
|
|
J.attributeBegin(BBOS.str());
|
|
J.arrayBegin();
|
|
|
|
const MachineInstr *PrevMI = nullptr;
|
|
for (const MachineInstr &MI : MBB) {
|
|
// Update register pressure tracker
|
|
if (!PrevMI || PrevMI->getOpcode() == AMDGPU::PHI)
|
|
RPTracker.reset(MI);
|
|
RPTracker.advance();
|
|
|
|
UseDistancePair Furthest;
|
|
UseDistancePair FurthestSubreg;
|
|
RelevantUses.clear();
|
|
NUA.getNextUseDistances(RPTracker.getLiveRegs(), MI, Furthest,
|
|
&FurthestSubreg, &RelevantUses);
|
|
|
|
J.objectBegin();
|
|
printInstrMember(J, MST, MI, NUAImpl);
|
|
printDistances(J, MRI, TRI, MST, RelevantUses);
|
|
printFurthestUse(J, MRI, TRI, MST, Furthest);
|
|
printFurthestUse(J, MRI, TRI, MST, FurthestSubreg, /*Subreg*/ true);
|
|
J.objectEnd();
|
|
|
|
PrevMI = &MI;
|
|
}
|
|
|
|
J.arrayEnd();
|
|
J.attributeEnd();
|
|
}
|
|
|
|
J.objectEnd();
|
|
J.attributeEnd();
|
|
|
|
if (DumpNextUseDistanceVerbose || DumpNextUseDistanceDefToUse)
|
|
printDistanceFromDefToUse(J, MF, NUA, TRI, MRI);
|
|
|
|
if (DumpNextUseDistanceVerbose)
|
|
NUAImpl.printPaths(J, MST);
|
|
|
|
if (DistanceCacheEnabled) {
|
|
J.attributeBegin("metrics");
|
|
J.objectBegin();
|
|
{
|
|
J.attributeBegin("distance-cache");
|
|
J.objectBegin();
|
|
{
|
|
J.attribute("hits", NUAImpl.getDistanceCacheHits());
|
|
J.attribute("misses", NUAImpl.getDistanceCacheMisses());
|
|
}
|
|
J.objectEnd();
|
|
J.attributeEnd(); // distance-cache
|
|
}
|
|
J.objectEnd();
|
|
J.attributeEnd(); // metrics
|
|
}
|
|
}
|
|
|
|
void printAsJson(raw_ostream &FallbackOS, TimerGroup &JsonTimerGroup,
|
|
Timer &JsonTimer, const MachineFunction &MF,
|
|
const AMDGPUNextUseAnalysis &NUA,
|
|
const AMDGPUNextUseAnalysisImpl &NUAImpl,
|
|
const LiveIntervals &LIS) {
|
|
std::string FN = DumpNextUseDistanceAsJson;
|
|
|
|
auto dump = [&](raw_ostream &OS) {
|
|
json::OStream J(OS, 2);
|
|
J.objectBegin();
|
|
|
|
J.attributeBegin("next-use-analysis");
|
|
J.objectBegin();
|
|
printNextUseDistancesAsJson(J, MF, NUA, NUAImpl, LIS);
|
|
J.objectEnd();
|
|
J.attributeEnd();
|
|
|
|
JsonTimer.stopTimer();
|
|
JsonTimerGroup.printJSONValues(OS, ",\n");
|
|
|
|
J.objectEnd();
|
|
};
|
|
|
|
if (!DumpNextUseDistanceAsJson.getNumOccurrences()) {
|
|
dump(FallbackOS);
|
|
} else if (FN.empty() || FN == "-") {
|
|
dump(outs());
|
|
} else {
|
|
std::error_code EC;
|
|
ToolOutputFile OutF(FN, EC, sys::fs::OF_None);
|
|
dump(OutF.os());
|
|
OutF.keep();
|
|
}
|
|
}
|
|
} // namespace
|
|
|
|
//------------------------------------------------------------------------------
|
|
// Legacy Printer Pass
|
|
//------------------------------------------------------------------------------
|
|
AMDGPUNextUseAnalysisPrinterLegacyPass::AMDGPUNextUseAnalysisPrinterLegacyPass()
|
|
: MachineFunctionPass(ID) {}
|
|
|
|
StringRef AMDGPUNextUseAnalysisPrinterLegacyPass::getPassName() const {
|
|
return "AMDGPU Next Use Analysis Printer";
|
|
}
|
|
|
|
bool AMDGPUNextUseAnalysisPrinterLegacyPass::runOnMachineFunction(
|
|
MachineFunction &MF) {
|
|
TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
|
|
"AMDGPU Next Use Analysis JSON Printer", false);
|
|
Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
|
|
JsonTimer.startTimer();
|
|
|
|
const LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
|
|
const AMDGPUNextUseAnalysis &NUA =
|
|
getAnalysis<AMDGPUNextUseAnalysisLegacyPass>().getNextUseAnalysis();
|
|
|
|
printAsJson(errs(), JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
|
|
|
|
return false;
|
|
}
|
|
|
|
void AMDGPUNextUseAnalysisPrinterLegacyPass::getAnalysisUsage(
|
|
AnalysisUsage &AU) const {
|
|
AU.addRequired<MachineLoopInfoWrapperPass>();
|
|
AU.addRequired<LiveIntervalsWrapperPass>();
|
|
AU.addRequired<AMDGPUNextUseAnalysisLegacyPass>();
|
|
AU.setPreservesAll();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
char AMDGPUNextUseAnalysisPrinterLegacyPass::ID = 0;
|
|
char &AMDGPUNextUseAnalysisPrinterLegacyID =
|
|
AMDGPUNextUseAnalysisPrinterLegacyPass::ID;
|
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisPrinterLegacyPass,
|
|
"amdgpu-next-use-printer",
|
|
"AMDGPU Next Use Analysis Printer", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
|
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
|
|
|
|
INITIALIZE_PASS_END(AMDGPUNextUseAnalysisPrinterLegacyPass,
|
|
"amdgpu-next-use-printer",
|
|
"AMDGPU Next Use Analysis Printer", false, false)
|
|
|
|
FunctionPass *llvm::createAMDGPUNextUseAnalysisPrinterLegacyPass() {
|
|
return new AMDGPUNextUseAnalysisPrinterLegacyPass();
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// New Pass Manager Printer Pass
|
|
//------------------------------------------------------------------------------
|
|
PreservedAnalyses
|
|
AMDGPUNextUseAnalysisPrinterPass::run(MachineFunction &MF,
|
|
MachineFunctionAnalysisManager &MFAM) {
|
|
|
|
TimerGroup JsonTimerGroup("amdgpu-next-use-analysis-json",
|
|
"AMDGPU Next Use Analysis JSON Printer", false);
|
|
Timer JsonTimer("json", "Total time spent generating json", JsonTimerGroup);
|
|
JsonTimer.startTimer();
|
|
|
|
const LiveIntervals &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
|
|
const AMDGPUNextUseAnalysis &NUA =
|
|
MFAM.getResult<AMDGPUNextUseAnalysisPass>(MF);
|
|
|
|
printAsJson(OS, JsonTimerGroup, JsonTimer, MF, NUA, *NUA.Impl, LIS);
|
|
|
|
return PreservedAnalyses::all();
|
|
}
|