//===- AMDGPUCoExecSchedStrategy.cpp - CoExec Scheduling Strategy ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Coexecution-focused scheduling strategy for AMDGPU. // //===----------------------------------------------------------------------===// #include "AMDGPUCoExecSchedStrategy.h" #include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::AMDGPU; #define DEBUG_TYPE "machine-scheduler" namespace { // Used to disable post-RA scheduling with function level granularity. class GCNNoopPostScheduleDAG final : public ScheduleDAGInstrs { public: explicit GCNNoopPostScheduleDAG(MachineSchedContext *C) : ScheduleDAGInstrs(*C->MF, C->MLI, /*RemoveKillFlags=*/true) {} // Do nothing. void schedule() override {} }; } // namespace static SUnit *pickOnlyChoice(SchedBoundary &Zone) { // pickOnlyChoice() releases pending instructions and checks for new hazards. SUnit *OnlyChoice = Zone.pickOnlyChoice(); if (!Zone.Pending.empty()) return nullptr; return OnlyChoice; } InstructionFlavor llvm::AMDGPU::classifyFlavor(const MachineInstr &MI, const SIInstrInfo &SII) { if (MI.isDebugInstr()) return InstructionFlavor::Other; unsigned Opc = MI.getOpcode(); // Check for specific opcodes first. if (Opc == AMDGPU::ATOMIC_FENCE || Opc == AMDGPU::S_WAIT_ASYNCCNT || Opc == AMDGPU::S_WAIT_TENSORCNT || Opc == AMDGPU::S_BARRIER_WAIT || Opc == AMDGPU::S_BARRIER_SIGNAL_IMM) return InstructionFlavor::Fence; if (SII.isLDSDMA(MI)) return InstructionFlavor::DMA; if (SII.isMFMAorWMMA(MI)) return InstructionFlavor::WMMA; if (SII.isTRANS(MI)) return InstructionFlavor::TRANS; if (SII.isVALU(MI)) return InstructionFlavor::SingleCycleVALU; if (SII.isDS(MI)) return InstructionFlavor::DS; if (SII.isFLAT(MI) || SII.isFLATGlobal(MI) || SII.isFLATScratch(MI)) return InstructionFlavor::VMEM; if (SII.isSALU(MI)) return InstructionFlavor::SALU; return InstructionFlavor::Other; } SUnit *HardwareUnitInfo::getNextTargetSU(bool LookDeep) const { for (auto *PrioritySU : PrioritySUs) { if (!PrioritySU->isTopReady()) return PrioritySU; } if (!LookDeep) return nullptr; unsigned MinDepth = std::numeric_limits::max(); SUnit *TargetSU = nullptr; for (auto *SU : AllSUs) { if (SU->isScheduled) continue; if (SU->isTopReady()) continue; if (SU->getDepth() < MinDepth) { MinDepth = SU->getDepth(); TargetSU = SU; } } return TargetSU; } void HardwareUnitInfo::insert(SUnit *SU, unsigned BlockingCycles) { #ifndef NDEBUG bool Inserted = AllSUs.insert(SU); assert(Inserted); #else AllSUs.insert(SU); #endif TotalCycles += BlockingCycles; if (PrioritySUs.empty()) { PrioritySUs.insert(SU); return; } unsigned SUDepth = SU->getDepth(); unsigned CurrDepth = (*PrioritySUs.begin())->getDepth(); if (SUDepth > CurrDepth) return; if (SUDepth == CurrDepth) { PrioritySUs.insert(SU); return; } // SU is lower depth and should be prioritized. PrioritySUs.clear(); PrioritySUs.insert(SU); } void HardwareUnitInfo::markScheduled(SUnit *SU, unsigned BlockingCycles) { // We may want to ignore some HWUIs (e.g. InstructionFlavor::Other). To do so, // we just clear the HWUI. However, we still have instructions which map to // this HWUI. Don't bother managing the state for these HWUI. if (TotalCycles == 0) return; AllSUs.remove(SU); PrioritySUs.remove(SU); TotalCycles -= BlockingCycles; if (AllSUs.empty()) return; if (PrioritySUs.empty()) { for (auto SU : AllSUs) { if (PrioritySUs.empty()) { PrioritySUs.insert(SU); continue; } unsigned SUDepth = SU->getDepth(); unsigned CurrDepth = (*PrioritySUs.begin())->getDepth(); if (SUDepth > CurrDepth) continue; if (SUDepth == CurrDepth) { PrioritySUs.insert(SU); continue; } // SU is lower depth and should be prioritized. PrioritySUs.clear(); PrioritySUs.insert(SU); } } } HardwareUnitInfo * CandidateHeuristics::getHWUIFromFlavor(InstructionFlavor Flavor) { for (auto &HWUICand : HWUInfo) { if (HWUICand.getType() == Flavor) { return &HWUICand; } } return nullptr; } unsigned CandidateHeuristics::getHWUICyclesForInst(SUnit *SU) { assert(SchedModel && SchedModel->hasInstrSchedModel()); unsigned ReleaseAtCycle = 0; const MCSchedClassDesc *SC = DAG->getSchedClass(SU); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { ReleaseAtCycle = std::max(ReleaseAtCycle, (unsigned)PI->ReleaseAtCycle); } return ReleaseAtCycle; } void CandidateHeuristics::updateForScheduling(SUnit *SU) { HardwareUnitInfo *HWUI = getHWUIFromFlavor(classifyFlavor(*SU->getInstr(), *SII)); assert(HWUI); HWUI->markScheduled(SU, getHWUICyclesForInst(SU)); } void CandidateHeuristics::initialize(ScheduleDAGMI *SchedDAG, const TargetSchedModel *TargetSchedModel, const TargetRegisterInfo *TRI) { DAG = SchedDAG; SchedModel = TargetSchedModel; assert(SchedModel && SchedModel->hasInstrSchedModel()); SRI = static_cast(TRI); SII = static_cast(DAG->TII); HWUInfo.resize((int)InstructionFlavor::NUM_FLAVORS); for (unsigned I = 0; I < HWUInfo.size(); I++) { HWUInfo[I].reset(); HWUInfo[I].setType(I); } HWUInfo[(int)InstructionFlavor::WMMA].setProducesCoexecWindow(true); HWUInfo[(int)InstructionFlavor::MultiCycleVALU].setProducesCoexecWindow(true); HWUInfo[(int)InstructionFlavor::TRANS].setProducesCoexecWindow(true); collectHWUIPressure(); } void CandidateHeuristics::collectHWUIPressure() { if (!SchedModel || !SchedModel->hasInstrSchedModel()) return; for (auto &SU : DAG->SUnits) { const InstructionFlavor Flavor = classifyFlavor(*SU.getInstr(), *SII); HWUInfo[(int)(Flavor)].insert(&SU, getHWUICyclesForInst(&SU)); } LLVM_DEBUG(dumpRegionSummary()); } void CandidateHeuristics::dumpRegionSummary() { MachineBasicBlock *BB = DAG->begin()->getParent(); dbgs() << "\n=== Region: " << DAG->MF.getName() << " BB" << BB->getNumber() << " (" << DAG->SUnits.size() << " SUs) ===\n"; dbgs() << "\nHWUI Resource Pressure:\n"; for (auto &HWUI : HWUInfo) { if (HWUI.getTotalCycles() == 0) continue; StringRef Name = getFlavorName(HWUI.getType()); dbgs() << " " << Name << ": " << HWUI.getTotalCycles() << " cycles, " << HWUI.size() << " instrs\n"; } dbgs() << "\n"; } void CandidateHeuristics::sortHWUIResources() { // Highest priority should be first. llvm::sort(HWUInfo, [](HardwareUnitInfo &A, HardwareUnitInfo &B) { // Prefer CoexecWindow producers if (A.producesCoexecWindow() != B.producesCoexecWindow()) return A.producesCoexecWindow(); // Prefer more demanded resources if (A.getTotalCycles() != B.getTotalCycles()) return A.getTotalCycles() > B.getTotalCycles(); // In ties -- prefer the resource with more instructions if (A.size() != B.size()) return A.size() < B.size(); // Default to Flavor order return (unsigned)A.getType() < (unsigned)B.getType(); }); } bool CandidateHeuristics::tryCriticalResourceDependency( GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const { auto HasPrioritySU = [this, &Cand, &TryCand](unsigned ResourceIdx) { const HardwareUnitInfo &HWUI = HWUInfo[ResourceIdx]; auto CandFlavor = classifyFlavor(*Cand.SU->getInstr(), *SII); auto TryCandFlavor = classifyFlavor(*TryCand.SU->getInstr(), *SII); bool LookDeep = (CandFlavor == InstructionFlavor::DS || TryCandFlavor == InstructionFlavor::DS) && HWUI.getType() == InstructionFlavor::WMMA; auto *TargetSU = HWUI.getNextTargetSU(LookDeep); // If we do not have a TargetSU for this resource, then it is not critical. if (!TargetSU) return false; return true; }; auto TryEnablesResource = [&Cand, &TryCand, this](unsigned ResourceIdx) { const HardwareUnitInfo &HWUI = HWUInfo[ResourceIdx]; auto CandFlavor = classifyFlavor(*Cand.SU->getInstr(), *SII); // We want to ensure our DS order matches WMMA order. bool LookDeep = CandFlavor == InstructionFlavor::DS && HWUI.getType() == InstructionFlavor::WMMA; auto *TargetSU = HWUI.getNextTargetSU(LookDeep); bool CandEnables = TargetSU != Cand.SU && DAG->IsReachable(TargetSU, Cand.SU); bool TryCandEnables = TargetSU != TryCand.SU && DAG->IsReachable(TargetSU, TryCand.SU); if (!CandEnables && !TryCandEnables) return false; if (CandEnables && !TryCandEnables) { if (Cand.Reason > GenericSchedulerBase::RegCritical) Cand.Reason = GenericSchedulerBase::RegCritical; return true; } if (!CandEnables && TryCandEnables) { TryCand.Reason = GenericSchedulerBase::RegCritical; return true; } // Both enable, prefer the critical path. unsigned CandHeight = Cand.SU->getHeight(); unsigned TryCandHeight = TryCand.SU->getHeight(); if (CandHeight > TryCandHeight) { if (Cand.Reason > GenericSchedulerBase::RegCritical) Cand.Reason = GenericSchedulerBase::RegCritical; return true; } if (CandHeight < TryCandHeight) { TryCand.Reason = GenericSchedulerBase::RegCritical; return true; } // Same critical path, just prefer original candidate. if (Cand.Reason > GenericSchedulerBase::RegCritical) Cand.Reason = GenericSchedulerBase::RegCritical; return true; }; for (unsigned I = 0; I < HWUInfo.size(); I++) { // If we have encountered a resource that is not critical, then neither // candidate enables a critical resource if (!HasPrioritySU(I)) continue; bool Enabled = TryEnablesResource(I); // If neither has enabled the resource, continue to the next resource if (Enabled) return true; } return false; } bool CandidateHeuristics::tryCriticalResource( GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary *Zone) const { for (unsigned I = 0; I < HWUInfo.size(); I++) { const HardwareUnitInfo &HWUI = HWUInfo[I]; bool CandUsesCrit = HWUI.contains(Cand.SU); bool TryCandUsesCrit = HWUI.contains(TryCand.SU); if (!CandUsesCrit && !TryCandUsesCrit) continue; if (CandUsesCrit != TryCandUsesCrit) { if (CandUsesCrit) { if (Cand.Reason > GenericSchedulerBase::RegCritical) Cand.Reason = GenericSchedulerBase::RegCritical; return true; } TryCand.Reason = GenericSchedulerBase::RegCritical; return true; } // Otherwise, both use the critical resource // For longer latency InstructionFlavors, we should prioritize first by // their enablement of critical resources if (HWUI.getType() == InstructionFlavor::DS) { if (tryCriticalResourceDependency(TryCand, Cand, Zone)) return true; } // Prioritize based on HWUI priorities. SUnit *Match = HWUI.getHigherPriority(Cand.SU, TryCand.SU); if (Match) { if (Match == Cand.SU) { if (Cand.Reason > GenericSchedulerBase::RegCritical) Cand.Reason = GenericSchedulerBase::RegCritical; return true; } TryCand.Reason = GenericSchedulerBase::RegCritical; return true; } } return false; } AMDGPUCoExecSchedStrategy::AMDGPUCoExecSchedStrategy( const MachineSchedContext *C) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::ILPInitialSchedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); // Use more accurate GCN pressure trackers. UseGCNTrackers = true; } void AMDGPUCoExecSchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { GCNSchedStrategy::initPolicy(Begin, End, NumRegionInstrs); assert((PreRADirection == MISched::Unspecified || PreRADirection == MISched::TopDown) && "coexec scheduler only supports top-down scheduling"); RegionPolicy.OnlyTopDown = true; RegionPolicy.OnlyBottomUp = false; RegionPolicy.ShouldTrackLaneMasks = true; } void AMDGPUCoExecSchedStrategy::initialize(ScheduleDAGMI *DAG) { // Coexecution scheduling strategy is only done top-down to support new // resource balancing heuristics. RegionPolicy.OnlyTopDown = true; RegionPolicy.OnlyBottomUp = false; GCNSchedStrategy::initialize(DAG); Heurs.initialize(DAG, SchedModel, TRI); } void AMDGPUCoExecSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { Heurs.updateForScheduling(SU); GCNSchedStrategy::schedNode(SU, IsTopNode); } SUnit *AMDGPUCoExecSchedStrategy::pickNode(bool &IsTopNode) { assert(RegionPolicy.OnlyTopDown && !RegionPolicy.OnlyBottomUp && "coexec scheduler only supports top-down scheduling"); if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); return nullptr; } bool PickedPending = false; SUnit *SU = nullptr; #ifndef NDEBUG SchedCandidate *PickedCand = nullptr; #endif do { PickedPending = false; SU = pickOnlyChoice(Top); if (!SU) { CandPolicy NoPolicy; TopCand.reset(NoPolicy); pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, PickedPending, /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find a candidate"); SU = TopCand.SU; #ifndef NDEBUG PickedCand = &TopCand; #endif } IsTopNode = true; } while (SU->isScheduled); LLVM_DEBUG(if (PickedCand) dumpPickSummary(SU, IsTopNode, *PickedCand)); if (PickedPending) { unsigned ReadyCycle = SU->TopReadyCycle; unsigned CurrentCycle = Top.getCurrCycle(); if (ReadyCycle > CurrentCycle) Top.bumpCycle(ReadyCycle); // checkHazard() does not expose the exact cycle where the hazard clears. while (Top.checkHazard(SU)) Top.bumpCycle(Top.getCurrCycle() + 1); Top.releasePending(); } if (SU->isTopReady()) Top.removeReady(SU); if (SU->isBottomReady()) Bot.removeReady(SU); LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); assert(IsTopNode && "coexec scheduler must only schedule from top boundary"); return SU; } void AMDGPUCoExecSchedStrategy::pickNodeFromQueue( SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &PickedPending, bool IsBottomUp) { assert(Zone.isTop() && "coexec scheduler only supports top boundary"); assert(!IsBottomUp && "coexec scheduler only supports top-down scheduling"); const SIRegisterInfo *SRI = static_cast(TRI); ArrayRef Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = 0; unsigned VGPRPressure = 0; PickedPending = false; if (DAG->isTrackingPressure()) { if (!useGCNTrackers()) { SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; } else { SGPRPressure = DownwardTracker.getPressure().getSGPRNum(); VGPRPressure = DownwardTracker.getPressure().getArchVGPRNum(); } } auto EvaluateQueue = [&](ReadyQueue &Q, bool FromPending) { for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, VGPRPressure, IsBottomUp); SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidateCoexec(Cand, TryCand, ZoneArg); if (TryCand.Reason != NoCand) { if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(Zone.DAG, SchedModel); LLVM_DEBUG(printCandidateDecision(Cand, TryCand)); PickedPending = FromPending; Cand.setBest(TryCand); } else { LLVM_DEBUG(printCandidateDecision(TryCand, Cand)); } } }; LLVM_DEBUG(dbgs() << "Available Q:\n"); EvaluateQueue(Zone.Available, /*FromPending=*/false); LLVM_DEBUG(dbgs() << "Pending Q:\n"); EvaluateQueue(Zone.Pending, /*FromPending=*/true); } #ifndef NDEBUG void AMDGPUCoExecSchedStrategy::dumpPickSummary(SUnit *SU, bool IsTopNode, SchedCandidate &Cand) { const SIInstrInfo *SII = static_cast(DAG->TII); unsigned Cycle = IsTopNode ? Top.getCurrCycle() : Bot.getCurrCycle(); dbgs() << "=== Pick @ Cycle " << Cycle << " ===\n"; const InstructionFlavor Flavor = classifyFlavor(*SU->getInstr(), *SII); dbgs() << "Picked: SU(" << SU->NodeNum << ") "; SU->getInstr()->print(dbgs(), /*IsStandalone=*/true, /*SkipOpers=*/false, /*SkipDebugLoc=*/true); dbgs() << " [" << getFlavorName(Flavor) << "]\n"; dbgs() << " Reason: "; if (LastAMDGPUReason != AMDGPUSchedReason::None) dbgs() << getReasonName(LastAMDGPUReason); else if (Cand.Reason != NoCand) dbgs() << GenericSchedulerBase::getReasonStr(Cand.Reason); else dbgs() << "Unknown"; dbgs() << "\n\n"; LastAMDGPUReason = AMDGPUSchedReason::None; } #endif bool AMDGPUCoExecSchedStrategy::tryCandidateCoexec(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) { // Initialize the candidate if needed. if (!Cand.isValid()) { TryCand.Reason = FirstValid; return true; } // Bias PhysReg Defs and copies to their uses and defined respectively. if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) return TryCand.Reason != NoCand; // Avoid exceeding the target's limit. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, RegExcess, TRI, DAG->MF)) return TryCand.Reason != NoCand; // We only compare a subset of features when comparing nodes between // Top and Bottom boundary. Some properties are simply incomparable, in many // other instances we should only override the other boundary if something // is a clear good pick on one boundary. Skip heuristics that are more // "tie-breaking" in nature. bool SameBoundary = Zone != nullptr; if (SameBoundary) { // Compare candidates by the stall they would introduce if // scheduled in the current cycle. if (tryEffectiveStall(Cand, TryCand, *Zone)) return TryCand.Reason != NoCand; Heurs.sortHWUIResources(); if (Heurs.tryCriticalResource(TryCand, Cand, Zone)) { LastAMDGPUReason = AMDGPUSchedReason::CritResourceBalance; return TryCand.Reason != NoCand; } if (Heurs.tryCriticalResourceDependency(TryCand, Cand, Zone)) { LastAMDGPUReason = AMDGPUSchedReason::CritResourceDep; return TryCand.Reason != NoCand; } } // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. // // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; bool CandIsClusterSucc = isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); bool TryCandIsClusterSucc = isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; if (SameBoundary) { // Weak edges are for clustering and other constraints. if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) return TryCand.Reason != NoCand; } // Avoid increasing the max pressure of the entire region. if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, Cand, RegMax, TRI, DAG->MF)) return TryCand.Reason != NoCand; if (SameBoundary) { // Avoid serializing long latency dependence chains. // For acyclic path limited loops, latency was already checked above. if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) return TryCand.Reason != NoCand; // Fall through to original instruction order. if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { TryCand.Reason = NodeOrder; return true; } } return false; } bool AMDGPUCoExecSchedStrategy::tryEffectiveStall(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone) const { // Treat structural and latency stalls as a single scheduling cost for the // current cycle. struct StallCosts { unsigned Ready = 0; unsigned Structural = 0; unsigned Latency = 0; unsigned Effective = 0; }; unsigned CurrCycle = Zone.getCurrCycle(); auto GetStallCosts = [&](SUnit *SU) { unsigned ReadyCycle = Zone.isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; StallCosts Costs; Costs.Ready = ReadyCycle > CurrCycle ? ReadyCycle - CurrCycle : 0; Costs.Structural = getStructuralStallCycles(Zone, SU); Costs.Latency = Zone.getLatencyStallCycles(SU); Costs.Effective = std::max({Costs.Ready, Costs.Structural, Costs.Latency}); return Costs; }; StallCosts TryCosts = GetStallCosts(TryCand.SU); StallCosts CandCosts = GetStallCosts(Cand.SU); LLVM_DEBUG(if (TryCosts.Effective || CandCosts.Effective) { dbgs() << "Effective stalls: try=" << TryCosts.Effective << " (ready=" << TryCosts.Ready << ", struct=" << TryCosts.Structural << ", lat=" << TryCosts.Latency << ") cand=" << CandCosts.Effective << " (ready=" << CandCosts.Ready << ", struct=" << CandCosts.Structural << ", lat=" << CandCosts.Latency << ")\n"; }); return tryLess(TryCosts.Effective, CandCosts.Effective, TryCand, Cand, Stall); } ScheduleDAGInstrs * llvm::createGCNCoExecMachineScheduler(MachineSchedContext *C) { LLVM_DEBUG(dbgs() << "AMDGPU coexec preRA scheduler selected for " << C->MF->getName() << '\n'); return new GCNScheduleDAGMILive( C, std::make_unique(C)); } ScheduleDAGInstrs * llvm::createGCNNoopPostMachineScheduler(MachineSchedContext *C) { LLVM_DEBUG(dbgs() << "AMDGPU nop postRA scheduler selected for " << C->MF->getName() << '\n'); return new GCNNoopPostScheduleDAG(C); }