//===- RematerializerTest.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Rematerializer.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/CodeGen/MIRParser/MIRParser.h" #include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Module.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetMachine.h" #include "gtest/gtest.h" #include using namespace llvm; using RegisterIdx = Rematerializer::RegisterIdx; class RematerializerTest : public testing::Test { public: LLVMContext Context; std::unique_ptr TM; std::unique_ptr M; std::unique_ptr MMI; std::unique_ptr MIR; std::unique_ptr> Regions; std::unique_ptr Remater; MachineFunction *MF; LoopAnalysisManager LAM; MachineFunctionAnalysisManager MFAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM; ModulePassManager MPM; FunctionPassManager FPM; MachineFunctionPassManager MFPM; ModuleAnalysisManager MAM; static void SetUpTestCase() { InitializeAllTargets(); InitializeAllTargetMCs(); } void SetUp() override { Triple TargetTriple("amdgcn--"); std::string Error; const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); if (!T) GTEST_SKIP(); TargetOptions Options; TM = std::unique_ptr(T->createTargetMachine( TargetTriple, "gfx950", "", Options, std::nullopt)); if (!TM) GTEST_SKIP(); MMI = std::make_unique(TM.get()); PassBuilder PB(TM.get()); PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); PB.registerLoopAnalyses(LAM); PB.registerMachineFunctionAnalyses(MFAM); PB.crossRegisterProxies(LAM, FAM, CGAM, MAM, &MFAM); MAM.registerPass([&] { return MachineModuleAnalysis(*MMI); }); } bool parseMIRAndInit(StringRef MIRCode, StringRef FunName) { SMDiagnostic Diagnostic; std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRCode); MIR = createMIRParser(std::move(MBuffer), Context); if (!MIR) return false; M = MIR->parseIRModule(); M->setDataLayout(TM->createDataLayout()); if (MIR->parseMachineFunctions(*M, MAM)) { M.reset(); return false; } MF = &FAM.getResult(*M->getFunction(FunName)) .getMF(); LiveIntervals &LIS = MFAM.getResult(*MF); // Create regions for the rematerializer. Both MBBs and terminator MIs // delimitate regions. Regions = std::make_unique>(); MachineInstr *FirstMI = nullptr; for (MachineBasicBlock &MBB : *MF) { for (MachineInstr &MI : MBB) { if (!FirstMI) FirstMI = &MI; if (MI.isTerminator()) { if (FirstMI != &MI) Regions->push_back({FirstMI, MI}); FirstMI = nullptr; } } // End the region at the end of the block. if (FirstMI) { Regions->push_back({FirstMI, MBB.end()}); FirstMI = nullptr; } } Remater = std::make_unique(*MF, *Regions, LIS); Remater->analyze(); return true; } MachineFunction &getMF() { return *MF; } Rematerializer &getRematerializer() { return *Remater; } /// Returns the number of users of register \p RegIdx. unsigned getNumUsers(RegisterIdx RegIdx) { unsigned NumUsers = 0; for (const auto &[_, RegionUses] : Remater->getReg(RegIdx).Uses) NumUsers += RegionUses.size(); return NumUsers; } /// Returns the size of region \p RegionIdx. unsigned getRegionSize(unsigned RegionIdx) { const Rematerializer::RegionBoundaries &Region = (*Regions)[RegionIdx]; return std::distance(Region.first, Region.second); } }; /// Asserts that region RegionIdx contains RegionSize instructions. #define ASSERT_REGION_SIZE(RegionIdx, RegionSize) \ ASSERT_EQ(getRegionSize(RegionIdx), RegionSize) /// Asserts that regions have sizes RegionSizes, which must be an iterable /// object with the same number of elements as the number of regions. #define ASSERT_REGION_SIZES(RegionSizes) \ { \ ASSERT_EQ(RegionSizes.size(), Regions->size()); \ for (const auto [RegionIdx, ExpectedSize] : enumerate(RegionSizes)) \ ASSERT_REGION_SIZE(RegionIdx, ExpectedSize); \ } /// Expects that register RegIdx in the rematerializer has a total of N users. #define EXPECT_NUM_USERS(RegIdx, N) \ EXPECT_EQ(getNumUsers(RegIdx), static_cast(N)) /// Expects that register RegIdx in the remterializer hsa no users. #define EXPECT_NO_USERS(RegIdx) EXPECT_NUM_USERS(RegIdx, 0) /// Expects that rematerialized register RegIdx has origin OriginIdx, is defined /// in region DefRegionIdx, and has a total of NumUsers users. #define EXPECT_REMAT(RegIdx, OriginIdx, DefRegionIdx, NumUsers) \ { \ const Rematerializer::Reg &RematReg = Remater.getReg(RegIdx); \ EXPECT_EQ(Remater.getOriginOf(RegIdx), OriginIdx); \ EXPECT_EQ(RematReg.DefRegion, DefRegionIdx); \ EXPECT_NUM_USERS(RegIdx, NumUsers); \ } /// Rematerializes a tree of registers to a single user in different ways using /// the dependency reuse mechanics and the coarse-grained or more fine-grained /// API. Rollback rematerializations in-between each different wave of /// rematerializations. TEST_F(RematerializerTest, TreeRematRollback) { StringRef MIR = R"( name: TreeRematRollback tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec bb.1: S_NOP 0, implicit %4 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "TreeRematRollback")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; Rollbacker Rollbacker; Remater.addListener(&Rollbacker); // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1; SmallVector RegionSizes{5, 1}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. unsigned NumRegs = 0; const RegisterIdx Cst0 = NumRegs++, Cst1 = NumRegs++, Add01 = NumRegs++, Cst3 = NumRegs++, Add23 = NumRegs++; ASSERT_EQ(Remater.getNumRegs(), NumRegs); // Rematerialize Add23 with all transitive dependencies. { Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1, DRI); Remater.updateLiveIntervals(); // None of the original registers have any users left. EXPECT_NO_USERS(Cst0); EXPECT_NO_USERS(Cst1); EXPECT_NO_USERS(Add01); EXPECT_NO_USERS(Cst3); EXPECT_NO_USERS(Add23); // Copies of all MIs were inserted into the second MBB. Original registers // were deleted. RegionSizes[MBB0] -= 5; RegionSizes[MBB1] += 5; ASSERT_REGION_SIZES(RegionSizes); NumRegs += 5; ASSERT_EQ(Remater.getNumRegs(), NumRegs); } // After rollback all rematerializations are removed from the MIR. Rollbacker.rollback(Remater); RegionSizes[MBB0] += 5; RegionSizes[MBB1] -= 5; ASSERT_REGION_SIZES(RegionSizes); // Rematerialize Add23 only with its direct dependencies, reuse the rest. { DRI.clear().reuse(Cst0).reuse(Cst1); Remater.rematerializeToRegion(/*RootIdx=*/Add23, /*UseRegion=*/MBB1, DRI); Remater.updateLiveIntervals(); // Re-used registers have rematerializations as their single user (original // users are dead). Rematerialized registers have no users. EXPECT_NUM_USERS(Cst0, 1); EXPECT_NUM_USERS(Cst1, 1); EXPECT_NO_USERS(Add01); EXPECT_NO_USERS(Cst3); EXPECT_NO_USERS(Add23); // Only immediate dependencies are copied to the second MBB. RegionSizes[MBB0] -= 3; RegionSizes[MBB1] += 3; ASSERT_REGION_SIZES(RegionSizes); NumRegs += 3; ASSERT_EQ(Remater.getNumRegs(), NumRegs); } // After rollback all rematerializations are removed from the MIR. Rollbacker.rollback(Remater); RegionSizes[MBB0] += 3; RegionSizes[MBB1] -= 3; ASSERT_REGION_SIZES(RegionSizes); // Rematerialize Add23 only with its direct dependencies as before, but // with as fine-grained operations as possible. { MachineInstr *NopMI = &*(*Regions)[MBB1].first; DRI.clear().reuse(Cst0).reuse(Cst1); const RegisterIdx RematAdd01 = Remater.rematerializeToPos(Add01, MBB1, NopMI, DRI); // This adds an additional user to the used constants, and does not change // existing users for the original register. EXPECT_NO_USERS(RematAdd01); EXPECT_NUM_USERS(Add01, 1); EXPECT_NUM_USERS(Cst0, 2); EXPECT_NUM_USERS(Cst1, 2); DRI.clear(); const RegisterIdx RematCst3 = Remater.rematerializeToPos(Cst3, MBB1, NopMI, DRI); // This does not change existing users for the original register. EXPECT_NO_USERS(RematCst3); EXPECT_NUM_USERS(Cst3, 1); DRI.clear().useRemat(Add01, RematAdd01).useRemat(Cst3, RematCst3); const RegisterIdx RematAdd23 = Remater.rematerializeToPos(Add23, MBB1, NopMI, DRI); // This adds a user to used rematerializations, and does not change existing // users for the original register. EXPECT_NO_USERS(RematAdd23); EXPECT_NUM_USERS(Add23, 1); EXPECT_NUM_USERS(RematAdd01, 1); EXPECT_NUM_USERS(RematCst3, 1); // Finally transfer the NOP user from the original to the rematerialized // register. Remater.transferUser(Add23, RematAdd23, MBB1, *NopMI); EXPECT_NO_USERS(Add23); EXPECT_NUM_USERS(RematAdd23, 1); RegionSizes[MBB0] -= 3; RegionSizes[MBB1] += 3; ASSERT_REGION_SIZES(RegionSizes); NumRegs += 3; ASSERT_EQ(Remater.getNumRegs(), NumRegs); } // This time don't rollback. Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// To rematerialize %3 along with all its dependencies before its only use in /// bb.1, we must first rematerialize %0 and %1 (in any order), then %2, and /// finally %3. The rematerializer had a rematerialization order bug wherein, /// because %0 is also used directly in the MI defining %3, it was /// rematerialized after %2, breaking the invariant that dependencies of a /// register must always be rematerialized before the register itself. TEST_F(RematerializerTest, MultiplePathsRematOrder) { StringRef MIR = R"( name: MultiplePathsRematOrder tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec %3:vgpr_32 = V_ADD_U32_e32 %0, %2, implicit $exec bb.1: S_NOP 0, implicit %3 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "MultiplePathsRematOrder")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; const unsigned MBB1 = 1; const RegisterIdx Add02 = 3; // This call would previously fail. Remater.rematerializeToRegion(Add02, MBB1, DRI); Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// Rematerializes a single register to multiple regions, tracking that /// rematerializations are linked correctly and making sure that the original /// register is deleted automatically when it no longer has any uses. TEST_F(RematerializerTest, MultiRegionsRemat) { StringRef MIR = R"( name: MultiRegionsRemat tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode bb.1: S_NOP 0, implicit %0, implicit %0 bb.2: S_NOP 0, implicit %0 S_NOP 0, implicit %0 bb.3: S_NOP 0, implicit %0 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "MultiRegionsRemat")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2, MBB3 = 3; SmallVector RegionSizes{1, 1, 2, 1}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. const RegisterIdx Cst0 = 0; ASSERT_EQ(Remater.getNumRegs(), 1U); // Rematerialization to MBB1. const RegisterIdx RematBB1 = Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB1, DRI); ++RegionSizes[MBB1]; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematBB1, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB1, /*NumUsers=*/1); // Rematerialization to MBB2. DRI.clear(); const RegisterIdx RematBB2 = Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB2, DRI); ++RegionSizes[MBB2]; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematBB2, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB2, /*NumUsers=*/2); // Rematerialization to MBB3. Rematerializing to the last original user // deletes the original register. DRI.clear(); const RegisterIdx RematBB3 = Remater.rematerializeToRegion(/*RootIdx=*/Cst0, /*UseRegion=*/MBB3, DRI); --RegionSizes[MBB0]; ++RegionSizes[MBB3]; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematBB3, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB3, /*NumUsers=*/1); Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// Rematerializes a tree of register with some unrematerializable operands to a /// final destination in two steps, creating rematerializations of /// rematerializations in the process. Make sure that origins of /// rematerializations are always original registers. TEST_F(RematerializerTest, MultiStep) { StringRef MIR = R"( name: MultiStep tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 %2:vgpr_32 = V_ADD_U32_e32 %0, %1, implicit $exec S_NOP 0, implicit %0 bb.1: %3:vgpr_32 = V_ADD_U32_e32 %2, %2, implicit $exec bb.2: S_NOP 0, implicit %3 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "MultiStep")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2; SmallVector RegionSizes{4, 1, 1}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. unsigned NumRegs = 0; const RegisterIdx Cst0 = NumRegs++, Add01 = NumRegs++, Add22 = NumRegs++; ASSERT_EQ(Remater.getNumRegs(), NumRegs); // Rematerialize Add01 from the first to the second block along with its // single rematerializable dependency (constant 0). The constant 1 has an // implicit def that is non-ignorable so it cannot be rematerialized. The // constant 0 remains in the first block because it has a user there, but the // add is deleted. Remater.rematerializeToRegion(/*RootIdx=*/Add01, /*UseRegion=*/MBB1, DRI); const RegisterIdx RematCst0 = NumRegs++, RematAdd01 = NumRegs++; RegionSizes[MBB0] -= 1; RegionSizes[MBB1] += 2; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematCst0, /*OriginIdx=*/Cst0, /*DefRegionIdx=*/MBB1, /*NumUsers=*/1); EXPECT_REMAT(/*RegIdx=*/RematAdd01, /*OriginIdx=*/Add01, /*DefRegionIdx=*/MBB1, /*NumUsers=*/1); // We are going to re-rematerialize a register so the LIS need to be // up-to-date. Remater.updateLiveIntervals(); // Rematerialize Add22 from the second to the third block, which will // also indirectly rematerialize RematAdd01; make sure the latter's // rematerializations's origin is the original register, not RematAdd01. DRI.clear().reuse(RematCst0); Remater.rematerializeToRegion(/*RootIdx=*/Add22, /*UseRegion=*/MBB2, DRI); const RegisterIdx RematRematAdd01 = NumRegs++, RematAdd22 = NumRegs++; RegionSizes[MBB1] -= 2; RegionSizes[MBB2] += 2; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematRematAdd01, /*OriginIdx=*/Add01, /*DefRegionIdx=*/MBB2, /*NumUsers=*/1); EXPECT_REMAT(/*RegIdx=*/RematAdd22, /*OriginIdx=*/Add22, /*DefRegionIdx=*/MBB2, /*NumUsers=*/1); Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// Checks that it is possible to rematerialize inside a region that was /// rendered empty by previous rematerializations (as long as the region ends /// with a terminator). TEST_F(RematerializerTest, EmptyRegion) { StringRef MIR = R"( name: EmptyRegion tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode bb.1: %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode bb.2: %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode S_BRANCH %bb.3 bb.3: S_NOP 0, implicit %0, implicit %1 S_NOP 0, implicit %2, implicit %3 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "EmptyRegion")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2, MBB3 = 3; SmallVector RegionSizes{2, 1, 1, 2}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. unsigned NumRegs = 0; const RegisterIdx Cst0 = NumRegs++, Cst1 = NumRegs++, Cst2 = NumRegs++, Cst3 = NumRegs++; ASSERT_EQ(Remater.getNumRegs(), NumRegs); // After rematerializing %2 and %3 to bb.3, their respective original defining // regions are empty. %2's region ends at the end of its parent block, whereas // %3's region ends at a terminator MI (S_BRANCH). Remater.rematerializeToRegion(/*RootIdx=*/Cst2, /*UseRegion=*/MBB3, DRI); Remater.rematerializeToRegion(/*RootIdx=*/Cst3, /*UseRegion=*/MBB3, DRI.clear()); RegionSizes[MBB1] -= 1; RegionSizes[MBB2] -= 1; RegionSizes[MBB3] += 2; ASSERT_REGION_SIZES(RegionSizes); // Move %0 to the empty MBB1 block/region. const RegisterIdx RematCst0 = Remater.rematerializeToRegion(Cst0, MBB1, DRI.clear()); Remater.transferRegionUsers(Cst0, RematCst0, MBB3); // Move %1 to the empty MBB2 region, right before the S_BRANCH terminator. const RegisterIdx RematCst1 = Remater.rematerializeToPos( Cst1, MBB2, (*Regions)[MBB2].first, DRI.clear()); Remater.transferRegionUsers(Cst1, RematCst1, MBB3); RegionSizes[MBB0] -= 2; RegionSizes[MBB1] += 1; RegionSizes[MBB2] += 1; ASSERT_REGION_SIZES(RegionSizes); Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// Checks that only registers with a single definition are rematerializable, /// even when registers are made up of multiple sub-registers each with their /// own definition. TEST_F(RematerializerTest, SubReg) { StringRef MIR = R"( name: SubReg tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: undef %01.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %01.sub1:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode undef %2.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode undef %34.sub0:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode bb.1: %34.sub1:vreg_64_align2 = nofpexcept V_CVT_I32_F64_e32 4, implicit $exec, implicit $mode S_NOP 0, implicit %01, implicit %2, implicit %34 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "SubReg")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1; SmallVector RegionSizes{4, 2}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. unsigned NumRegs = 0; const RegisterIdx Cst2 = NumRegs++; ASSERT_EQ(Remater.getNumRegs(), NumRegs); RegisterIdx RematCst2 = Remater.rematerializeToRegion(/*RootIdx=*/Cst2, /*UseRegion=*/MBB1, DRI); RegionSizes[MBB0] -= 1; RegionSizes[MBB1] += 1; ASSERT_REGION_SIZES(RegionSizes); EXPECT_REMAT(/*RegIdx=*/RematCst2, /*OriginIdx=*/Cst2, /*DefRegionIdx=*/MBB1, /*NumUsers=*/1); Remater.updateLiveIntervals(); EXPECT_TRUE(getMF().verify()); } /// The rematerializer had a bug where re-creating the interval of a /// non-rematerializable super-register defined over multiple MIs, some of which /// defining entirely dead subregisters, could cause a crash when changing the /// order of sub-definitions (for example during scheduling) because the /// re-created interval could end up with multiple connected components, which /// is illegal. The solution is to split separate components of the interval in /// such cases. TEST_F(RematerializerTest, SplitSubRegDeadDef) { StringRef MIR = R"( name: SplitSubRegDeadDef tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: undef %0.sub0:vreg_64 = IMPLICIT_DEF %0.sub1:vreg_64 = IMPLICIT_DEF %1:vgpr_32 = V_ADD_U32_e32 %0.sub0, %0.sub0, implicit $exec bb.1: S_NOP 0, implicit %1 S_ENDPGM 0 ... )"; ASSERT_TRUE(parseMIRAndInit(MIR, "SplitSubRegDeadDef")); LiveIntervals &LIS = MFAM.getResult(*MF); // Replicates the scheduler's effect on LIS on an intra-block move of MI. auto MoveMIAndAdjustLiveness = [&](MachineInstr &MI) { LIS.handleMove(MI); const MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegisterOperands RegOpers; RegOpers.collect(MI, TRI, MRI, true, /*IgnoreDead=*/false); SlotIndex Sub1Slot = LIS.getInstructionIndex(MI).getRegSlot(); RegOpers.adjustLaneLiveness(LIS, MRI, Sub1Slot, &MI); }; MachineBasicBlock &MBB0 = *MF->getBlockNumbered(0); MachineInstr &Sub0Def = *MBB0.begin(); MachineInstr &Sub1Def = *MBB0.begin()->getNextNode(); // Flip %0's subdefinition order. After the move, the definitions look like: // undef %0.sub1:vreg_64 = IMPLICIT_DEF // undef %0.sub0:vreg_64 = IMPLICIT_DEF MBB0.splice(Sub0Def.getIterator(), &MBB0, Sub1Def.getIterator()); MoveMIAndAdjustLiveness(Sub1Def); // Rematerialize %1 to bb.1. This triggers a live-interval update of %0 when // calling Remater.updateLiveIntervals(), during which its interval is split. Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; const unsigned MBB1 = 1; const RegisterIdx Add = 0; Remater.rematerializeToRegion(Add, MBB1, DRI); Remater.updateLiveIntervals(); // If we didn't split %0 before, its definitions would now look like: // dead undef %0.sub1:vreg_64 = IMPLICIT_DEF // undef %0.sub0:vreg_64 = IMPLICIT_DEF // // Trying to flip back %0's definition order then triggers an // error in LIS.handleMove because its live interval is made up of multiple // connected components. ASSERT_NE(Sub0Def.getOperand(0).getReg(), Sub1Def.getOperand(0).getReg()); MBB0.splice(MBB0.end(), &MBB0, Sub1Def.getIterator()); MoveMIAndAdjustLiveness(Sub1Def); EXPECT_TRUE(getMF().verify()); } /// Checks that rollback works as expected when the rollback listener is added /// mid-rematerializations. TEST_F(RematerializerTest, Rollback) { StringRef MIR = R"( name: Rollback tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode bb.1: S_NOP 0, implicit %0, implicit %1 bb.2: S_NOP 0, implicit %0, implicit %1 S_ENDPGM 0 )"; ASSERT_TRUE(parseMIRAndInit(MIR, "Rollback")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1, MBB2 = 2; SmallVector RegionSizes{2, 1, 1}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. unsigned NumRegs = 0; const RegisterIdx Cst0 = NumRegs++, Cst1 = NumRegs++; ASSERT_EQ(Remater.getNumRegs(), NumRegs); // Rematerialize %0 to MBB1, taking one user from the original register. RegisterIdx RematCst0MBB1 = Remater.rematerializeToRegion(Cst0, MBB1, DRI); RegionSizes[MBB1] += 1; ASSERT_REGION_SIZES(RegionSizes); NumRegs += 1; ASSERT_EQ(Remater.getNumRegs(), NumRegs); Rollbacker Rollback; Remater.addListener(&Rollback); // Rematerialize %0 to MBB2 amd %1 to MBB1/MBB2; each rematerialization ends // up with a single user and both original registers are deleted. RegisterIdx RematCst0MBB2 = Remater.rematerializeToRegion(Cst0, MBB2, DRI.clear()); RegisterIdx RematCst1MBB1 = Remater.rematerializeToRegion(Cst1, MBB1, DRI.clear()); RegisterIdx RematCst1MBB2 = Remater.rematerializeToRegion(Cst1, MBB2, DRI.clear()); RegionSizes[MBB0] -= 2; RegionSizes[MBB1] += 1; RegionSizes[MBB2] += 2; ASSERT_REGION_SIZES(RegionSizes); NumRegs += 3; ASSERT_EQ(Remater.getNumRegs(), NumRegs); EXPECT_NO_USERS(Cst0); EXPECT_NO_USERS(Cst1); EXPECT_NUM_USERS(RematCst0MBB1, 1); EXPECT_NUM_USERS(RematCst0MBB2, 1); EXPECT_NUM_USERS(RematCst1MBB1, 1); EXPECT_NUM_USERS(RematCst1MBB2, 1); // Rollback all changes since the rollbacker was added. The first // rematerialization of %0 to MBB1 happened before so it is not rolled back. // However %0 is re-created because it was deleted after. Rollback.rollback(Remater); RegionSizes[MBB0] += 2; RegionSizes[MBB1] -= 1; RegionSizes[MBB2] -= 2; ASSERT_REGION_SIZES(RegionSizes); ASSERT_EQ(Remater.getNumRegs(), NumRegs); EXPECT_NUM_USERS(Cst0, 1); EXPECT_NUM_USERS(Cst1, 2); EXPECT_NUM_USERS(RematCst0MBB1, 1); EXPECT_NO_USERS(RematCst0MBB2); EXPECT_NO_USERS(RematCst1MBB1); EXPECT_NO_USERS(RematCst1MBB2); EXPECT_TRUE(getMF().verify()); } /// Checks that rollback re-creates MIs at correct positions when the order of /// register deletions forces the re-creation logic to iterate through multiple /// deleted registers' respective insert position to find a valid one. TEST_F(RematerializerTest, RollbackInvalidInsertPos) { StringRef MIR = R"( name: RollbackInvalidInsertPos tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true body: | bb.0: %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode bb.1: S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3 S_ENDPGM 0 )"; ASSERT_TRUE(parseMIRAndInit(MIR, "RollbackInvalidInsertPos")); Rematerializer &Remater = getRematerializer(); Rematerializer::DependencyReuseInfo DRI; Rollbacker Rollback; Remater.addListener(&Rollback); // MBB/Region indices. const unsigned MBB0 = 0, MBB1 = 1; SmallVector RegionSizes{4, 1}; ASSERT_REGION_SIZES(RegionSizes); // Indices of rematerializable registers. const RegisterIdx Cst0 = 0, Cst1 = 1, Cst2 = 2, Cst3 = 3; // Rematerialize %0 to MBB1, deleting the original register Remater.rematerializeToRegion(Cst0, MBB1, DRI); RegionSizes[MBB0] -= 1; RegionSizes[MBB1] += 1; ASSERT_REGION_SIZES(RegionSizes); // Rematerialize %1 to MBB1, deleting the original register Remater.rematerializeToRegion(Cst1, MBB1, DRI.clear()); RegionSizes[MBB0] -= 1; RegionSizes[MBB1] += 1; ASSERT_REGION_SIZES(RegionSizes); // Rematerialize %2 to MBB1, deleting the original register Remater.rematerializeToRegion(Cst2, MBB1, DRI.clear()); RegionSizes[MBB0] -= 1; RegionSizes[MBB1] += 1; ASSERT_REGION_SIZES(RegionSizes); // Now rollback and check for correct instruction order in the original // defining region. The asserts on region sizes ensure that all original // registers were indeed deleted and will be re-created in the original // region. Rollback.rollback(Remater); RegionSizes[MBB0] += 3; RegionSizes[MBB1] -= 3; ASSERT_REGION_SIZES(RegionSizes); MachineInstr &DefCst0 = *Remater.getReg(Cst0).DefMI; MachineInstr &DefCst1 = *Remater.getReg(Cst1).DefMI; MachineInstr &DefCst2 = *Remater.getReg(Cst2).DefMI; MachineInstr &DefCst3 = *Remater.getReg(Cst3).DefMI; EXPECT_EQ(std::next(DefCst0.getIterator()), DefCst1.getIterator()); EXPECT_EQ(std::next(DefCst1.getIterator()), DefCst2.getIterator()); EXPECT_EQ(std::next(DefCst2.getIterator()), DefCst3.getIterator()); EXPECT_TRUE(getMF().verify()); }