Files
llvm-project/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h
macurtis-amd b9ae01500d AMDGPU: Add NextUseAnalysis Pass (#178873)
Based on
- https://github.com/llvm/llvm-project/pull/156079 and
- https://github.com/llvm/llvm-project/pull/171520

See those PRs for background.

Provides a compatibility mode option
`--amdgpu-next-use-analysis-compatibility-mode` that produces results
that match either PR #156079 (`compute`) or PR #171520 (`graphics`).

Co-authored-by: alex-t <atimofee@amd.com>
Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com>

---------

Co-authored-by: Konstantina Mitropoulou <KonstantinaMitropoulou@amd.com>
2026-04-16 12:05:59 -05:00

379 lines
13 KiB
C++

//===---------------------- AMDGPUNextUseAnalysis.h ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements Next Use Analysis.
//
// For each register it goes over all uses and returns the estimated distance of
// the nearest use. This will be used for selecting which registers to spill
// before register allocation.
//
// This is based on ideas from the paper:
// "Register Spilling and Live-Range Splitting for SSA-Form Programs"
// Matthias Braun and Sebastian Hack, CC'09
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/JSON.h"
#include <limits>
#include <optional>
namespace llvm {
class AMDGPUNextUseAnalysisImpl;
//==============================================================================
// NextUseDistance - Represents a distance in the next-use analysis. Currently
// wraps a 64-bit int with special encoding for loop depth and unreachable
// distances.
//==============================================================================
class NextUseDistance {
public:
constexpr static NextUseDistance unreachable() {
return NextUseDistance(std::numeric_limits<int64_t>::max());
}
constexpr static NextUseDistance fromSize(unsigned Size, unsigned Depth) {
return NextUseDistance(Size).applyLoopWeight(Depth);
}
constexpr NextUseDistance(unsigned V) : Value(V) {}
constexpr NextUseDistance(int V) : Value(V) {}
constexpr NextUseDistance(const NextUseDistance &B) : Value(B.Value) {}
constexpr bool isUnreachable() const { return *this == unreachable(); }
constexpr bool isReachable() const { return !isUnreachable(); }
//----------------------------------------------------------------------------
// Assignment
//----------------------------------------------------------------------------
constexpr NextUseDistance &operator=(const NextUseDistance &B) {
Value = B.Value;
return *this;
}
constexpr NextUseDistance &operator=(unsigned V) {
Value = V;
return *this;
}
constexpr NextUseDistance &operator=(int V) {
Value = V;
return *this;
}
//----------------------------------------------------------------------------
// Arithmetic operators
//----------------------------------------------------------------------------
constexpr NextUseDistance &operator+=(const NextUseDistance &B) {
Value += B.Value;
return *this;
}
constexpr NextUseDistance &operator-=(const NextUseDistance &B) {
Value -= B.Value;
return *this;
}
constexpr NextUseDistance operator-() const {
return NextUseDistance(-Value);
}
constexpr NextUseDistance applyLoopWeight() const {
NextUseDistance W = fromLoopDepth(1);
if (W.isUnreachable())
return unreachable();
constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max();
if (Value != 0 && W.Value > MaxVal / Value)
return unreachable();
return NextUseDistance(Value * W.Value);
}
//----------------------------------------------------------------------------
// Comparison operators
//----------------------------------------------------------------------------
constexpr bool operator<(const NextUseDistance &B) const {
return Value < B.Value;
}
constexpr bool operator>(const NextUseDistance &B) const {
return Value > B.Value;
}
constexpr bool operator<=(const NextUseDistance &B) const {
return Value <= B.Value;
}
constexpr bool operator>=(const NextUseDistance &B) const {
return Value >= B.Value;
}
constexpr bool operator==(const NextUseDistance &B) const {
return Value == B.Value;
}
constexpr bool operator!=(const NextUseDistance &B) const {
return Value != B.Value;
}
//----------------------------------------------------------------------------
// Debugging
//----------------------------------------------------------------------------
format_object<int64_t> fmt() const { return format("%ld", Value); }
void print(raw_ostream &OS) const {
if (isUnreachable())
OS << "<unreachable>";
else
OS << fmt();
}
json::Value toJsonValue() const {
if (isUnreachable())
return "<unreachable>";
return Value;
}
std::string toString() const {
std::string Str;
llvm::raw_string_ostream OS(Str);
print(OS);
return OS.str();
}
constexpr int64_t getRawValue() const { return Value; }
using RawValueType = int64_t;
private:
friend class AMDGPUNextUseAnalysisImpl;
int64_t Value;
constexpr explicit NextUseDistance(int64_t V) : Value(V) {}
constexpr static NextUseDistance fromLoopDepth(unsigned Depth) {
const unsigned Shift = 7 * Depth;
// Saturate?
if (Shift >= 63)
return unreachable();
// This implementation is multiplicative (f(a+b) == f(a) * f(b)) which we
// take advantage of below in applyLoopWeight(Depth).
return NextUseDistance(int64_t(1) << Shift);
}
// Semantically: apply fromLoopDepth(1) Depth times (compositional).
//
// Optimized to take advantage of multiplicative implementation of
// fromLoopDepth - a single multiply by fromLoopDepth(Depth) gives the same
// result. If fromLoopDepth is changed to a non-multiplicative formula,
// replace the body with something like:
//
// NextUseDistance D = *this;
// for (unsigned I = 0; I < Depth; ++I) {
// D = D.applyLoopWeight();
// if (D.isUnreachable())
// return unreachable();
// }
// return D;
//
constexpr NextUseDistance applyLoopWeight(unsigned Depth) const {
if (!Depth)
return *this;
NextUseDistance W = fromLoopDepth(Depth);
if (W.isUnreachable())
return unreachable();
constexpr int64_t MaxVal = std::numeric_limits<int64_t>::max();
if (Value != 0 && W.Value > MaxVal / Value)
return unreachable();
return NextUseDistance(Value * W.Value);
}
};
constexpr inline NextUseDistance operator+(NextUseDistance A,
const NextUseDistance &B) {
return A += B;
}
constexpr inline NextUseDistance operator-(NextUseDistance A,
const NextUseDistance &B) {
return A -= B;
}
constexpr inline NextUseDistance min(NextUseDistance A, NextUseDistance B) {
return A < B ? A : B;
}
constexpr inline NextUseDistance max(NextUseDistance A, NextUseDistance B) {
return A > B ? A : B;
}
//==============================================================================
// AMDGPUNextUseAnalysis - Provides next-use distances for live registers or
// sub-registers at a given MachineInstruction suitable for making spilling
// decisions.
//==============================================================================
class AMDGPUNextUseAnalysis {
friend class AMDGPUNextUseAnalysisLegacyPass;
friend class AMDGPUNextUseAnalysisPrinterLegacyPass;
friend class AMDGPUNextUseAnalysisPass;
friend class AMDGPUNextUseAnalysisPrinterPass;
std::unique_ptr<AMDGPUNextUseAnalysisImpl> Impl;
AMDGPUNextUseAnalysis(const MachineFunction *, const MachineLoopInfo *);
public:
AMDGPUNextUseAnalysis(AMDGPUNextUseAnalysis &&Other);
~AMDGPUNextUseAnalysis();
AMDGPUNextUseAnalysis &operator=(AMDGPUNextUseAnalysis &&Other);
// Configuration flags for controlling the distance model. Defaults correspond
// to the Graphics preset.
struct Config {
// Count PHI instructions as having non-zero cost (distance and block
// size). When false, all PHIs share ID 0 and don't contribute to block
// size.
bool CountPhis = true;
// Restrict inter-block distances to forward-reachable paths only.
// When false, distances through back-edges are also considered.
bool ForwardOnly = true;
// Model PHI uses as belonging to their incoming edge's block, and apply
// full loop-aware reachability filtering including intermediate-def
// checks. When false, a simple same-block / forward-reachable check is
// used.
bool PreciseUseModeling = false;
// Promote uses that are inside a loop not yet entered or inside a directly
// nested inner loop to the end of that loop's preheader. This models the
// assumption that a spilled value will be reloaded at the preheader rather
// than at the actual use site. When false, direct shortest distance to the
// use is used instead.
bool PromoteToPreheader = false;
/// Named presets. See note in AMDGPUNextUseAnalysis.cpp associated with
/// 'amdgpu-next-use-analysis-config' regarding the historical context for
/// these.
static Config Graphics() { return {}; }
static Config Compute() {
Config Cfg;
Cfg.CountPhis = false;
Cfg.ForwardOnly = false;
Cfg.PreciseUseModeling = true;
Cfg.PromoteToPreheader = true;
return Cfg;
}
};
Config getConfig() const;
void setConfig(Config);
void getReachableUses(Register LiveReg, LaneBitmask LaneMask,
const MachineInstr &MI,
SmallVector<const MachineOperand *> &Uses) const;
/// \Returns the shortest next-use distance from \p CurMI for \p LiveReg.
NextUseDistance
getShortestDistance(Register LiveReg, const MachineInstr &CurMI,
const SmallVector<const MachineOperand *> &Uses,
const MachineOperand **ShortestUseOut = nullptr,
SmallVector<NextUseDistance> *Distances = nullptr) const;
struct UseDistancePair {
const MachineOperand *Use = nullptr;
NextUseDistance Dist = 0;
UseDistancePair() = default;
UseDistancePair(const MachineOperand *Use, NextUseDistance Dist)
: Use(Use), Dist(Dist) {}
};
void getNextUseDistances(const DenseMap<unsigned, LaneBitmask> &LiveRegs,
const MachineInstr &MI, UseDistancePair &Furthest,
UseDistancePair *FurthestSubreg = nullptr,
DenseMap<const MachineOperand *, UseDistancePair>
*RelevantUses = nullptr) const;
};
//==============================================================================
// AMDGPUNextUseAnalysisLegacyPass - Legacy and New pass wrapper around
// AMDGPUNextUseAnalysis
//==============================================================================
class AMDGPUNextUseAnalysisLegacyPass : public MachineFunctionPass {
public:
static char ID;
AMDGPUNextUseAnalysisLegacyPass();
AMDGPUNextUseAnalysis &getNextUseAnalysis() { return *NUA; }
const AMDGPUNextUseAnalysis &getNextUseAnalysis() const { return *NUA; }
StringRef getPassName() const override;
protected:
bool runOnMachineFunction(MachineFunction &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
std::unique_ptr<AMDGPUNextUseAnalysis> NUA;
};
class AMDGPUNextUseAnalysisPass
: public AnalysisInfoMixin<AMDGPUNextUseAnalysisPass> {
friend AnalysisInfoMixin<AMDGPUNextUseAnalysisPass>;
static AnalysisKey Key;
public:
using Result = AMDGPUNextUseAnalysis;
Result run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM);
};
//==============================================================================
// AMDGPUNextUseAnalysisPrinterLegacyPass - Legacy Pass for printing
// AMDGPUNextUseAnalysis results as JSON.
//==============================================================================
class AMDGPUNextUseAnalysisPrinterLegacyPass : public MachineFunctionPass {
public:
static char ID;
AMDGPUNextUseAnalysisPrinterLegacyPass();
StringRef getPassName() const override;
protected:
bool runOnMachineFunction(MachineFunction &) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
};
class AMDGPUNextUseAnalysisPrinterPass
: public PassInfoMixin<AMDGPUNextUseAnalysisPrinterPass> {
raw_ostream &OS;
public:
explicit AMDGPUNextUseAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
static bool isRequired() { return true; }
};
} // namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H