[clang][deps] Use ModuleFile instead of Module (#194028)

This switches the dependency scanner from handling `Module` objects to
dealing with `serialization::ModuleFile`. Module file already contains
(almost) all of the information the scanner needs, and has the advantage
of not forcing deserialization of `Module` objects from PCM files, which
will become important in a later PR.

This alone improves clean scans by 1.8% and incremental scans by 2.6%.
This effect is likely caused by removing iteration over many `Module`
objects, removing deduplication via associative containers deduplication
of work, and sorting `Module` objects by name to ensure deterministic
order.
This commit is contained in:
Jan Svoboda
2026-04-29 13:48:04 -07:00
committed by GitHub
parent 015e6ca241
commit bf2b1efce5
4 changed files with 71 additions and 162 deletions

View File

@@ -93,6 +93,8 @@ struct DependencyScanningServiceOptions {
ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default; ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default;
/// Whether to make reported file paths absolute. /// Whether to make reported file paths absolute.
bool ReportAbsolutePaths = true; bool ReportAbsolutePaths = true;
/// Whether to report modules visible from modules that are imported directly.
bool ReportVisibleModules = false;
/// Whether the resulting command lines should load explicit PCMs eagerly. /// Whether the resulting command lines should load explicit PCMs eagerly.
bool EagerLoadModules = false; bool EagerLoadModules = false;
/// Whether to trace VFS accesses during the scan. /// Whether to trace VFS accesses during the scan.

View File

@@ -40,10 +40,9 @@ struct PrebuiltModuleDep {
std::string PCMFile; std::string PCMFile;
std::string ModuleMapFile; std::string ModuleMapFile;
explicit PrebuiltModuleDep(const Module *M) explicit PrebuiltModuleDep(const serialization::ModuleFile *MF)
: ModuleName(M->getTopLevelModuleName()), : ModuleName(MF->ModuleName), PCMFile(MF->FileName.str()),
PCMFile(M->getASTFileName()->str()), ModuleMapFile(MF->ModuleMapPath) {}
ModuleMapFile(M->PresumedModuleMapFile) {}
}; };
/// Attributes loaded from AST files of prebuilt modules collected prior to /// Attributes loaded from AST files of prebuilt modules collected prior to
@@ -247,34 +246,12 @@ private:
void handleImport(const Module *Imported); void handleImport(const Module *Imported);
/// Adds direct modular dependencies that have already been built to the
/// ModuleDeps instance.
void
addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &SeenSubmodules);
void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &SeenSubmodules);
/// Traverses the previously collected direct modular dependencies to discover
/// transitive modular dependencies and fills the parent \c ModuleDepCollector
/// with both.
/// Returns the ID or nothing if the dependency is spurious and is ignored. /// Returns the ID or nothing if the dependency is spurious and is ignored.
std::optional<ModuleID> handleTopLevelModule(const Module *M); std::optional<ModuleID> handleTopLevelModule(serialization::ModuleFile *MF);
void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
void addModuleDep(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
/// Traverses the affecting modules and updates \c MD with references to the /// Adds direct module dependencies to the ModuleDeps instance. This includes
/// parent \c ModuleDepCollector info. /// prebuilt module and implicitly-built modules.
void void addAllModuleDeps(serialization::ModuleFile &MF, ModuleDeps &MD);
addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
void addAffectingClangModule(const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules);
/// Add discovered module dependency for the given module.
void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD);
}; };
/// Collects modular and non-modular dependencies of the main file by attaching /// Collects modular and non-modular dependencies of the main file by attaching
@@ -321,14 +298,16 @@ private:
/// textually included header files. /// textually included header files.
std::vector<std::string> FileDeps; std::vector<std::string> FileDeps;
/// Direct and transitive modular dependencies of the main source file. /// Direct and transitive modular dependencies of the main source file.
llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps; llvm::MapVector<serialization::ModuleFile *, std::unique_ptr<ModuleDeps>>
ModularDeps;
/// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
/// a preprocessor. Storage owned by \c ModularDeps. /// a preprocessor. Storage owned by \c ModularDeps.
llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID; llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
/// Direct modular dependencies that have already been built. /// Direct modular dependencies that have already been built.
llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps; llvm::MapVector<serialization::ModuleFile *, PrebuiltModuleDep>
DirectPrebuiltModularDeps;
/// Working set of direct modular dependencies. /// Working set of direct modular dependencies.
llvm::SetVector<const Module *> DirectModularDeps; llvm::SetVector<serialization::ModuleFile *> DirectModularDeps;
/// Working set of direct modular dependencies, as they were imported. /// Working set of direct modular dependencies, as they were imported.
llvm::SmallPtrSet<const Module *, 32> DirectImports; llvm::SmallPtrSet<const Module *, 32> DirectImports;
/// All direct and transitive visible modules. /// All direct and transitive visible modules.
@@ -351,7 +330,7 @@ private:
ModuleDepCollectorPP *CollectorPPPtr = nullptr; ModuleDepCollectorPP *CollectorPPPtr = nullptr;
/// Checks whether the module is known as being prebuilt. /// Checks whether the module is known as being prebuilt.
bool isPrebuiltModule(const Module *M); bool isPrebuiltModule(const serialization::ModuleFile *MF);
/// Computes all visible modules resolved from direct imports. /// Computes all visible modules resolved from direct imports.
void addVisibleModules(); void addVisibleModules();

View File

@@ -584,12 +584,16 @@ void ModuleDepCollectorPP::handleImport(const Module *Imported) {
return; return;
const Module *TopLevelModule = Imported->getTopLevelModule(); const Module *TopLevelModule = Imported->getTopLevelModule();
const ModuleFileKey *MFKey = TopLevelModule->getASTFileKey();
if (!MFKey)
return;
serialization::ModuleFile *MF =
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(*MFKey);
if (MDC.isPrebuiltModule(TopLevelModule)) if (MDC.isPrebuiltModule(MF))
MDC.DirectPrebuiltModularDeps.insert( MDC.DirectPrebuiltModularDeps.insert({MF, PrebuiltModuleDep{MF}});
{TopLevelModule, PrebuiltModuleDep{TopLevelModule}});
else { else {
MDC.DirectModularDeps.insert(TopLevelModule); MDC.DirectModularDeps.insert(MF);
MDC.DirectImports.insert(Imported); MDC.DirectImports.insert(Imported);
} }
} }
@@ -629,14 +633,19 @@ void ModuleDepCollectorPP::EndOfMainFile() {
} }
for (const Module *M : for (const Module *M :
MDC.ScanInstance.getPreprocessor().getAffectingClangModules()) MDC.ScanInstance.getPreprocessor().getAffectingClangModules()) {
if (!MDC.isPrebuiltModule(M)) serialization::ModuleFile *MF =
MDC.DirectModularDeps.insert(M); MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
*M->getASTFileKey());
if (!MDC.isPrebuiltModule(MF))
MDC.DirectModularDeps.insert(MF);
}
MDC.addVisibleModules(); if (MDC.Service.getOpts().ReportVisibleModules)
MDC.addVisibleModules();
for (const Module *M : MDC.DirectModularDeps) for (serialization::ModuleFile *MF : MDC.DirectModularDeps)
handleTopLevelModule(M); handleTopLevelModule(MF);
MDC.Consumer.handleContextHash( MDC.Consumer.handleContextHash(
MDC.ScanInstance.getInvocation().computeContextHash()); MDC.ScanInstance.getInvocation().computeContextHash());
@@ -649,8 +658,8 @@ void ModuleDepCollectorPP::EndOfMainFile() {
for (auto &&I : MDC.ModularDeps) for (auto &&I : MDC.ModularDeps)
MDC.Consumer.handleModuleDependency(*I.second); MDC.Consumer.handleModuleDependency(*I.second);
for (const Module *M : MDC.DirectModularDeps) { for (serialization::ModuleFile *MF : MDC.DirectModularDeps) {
auto It = MDC.ModularDeps.find(M); auto It = MDC.ModularDeps.find(MF);
// Only report direct dependencies that were successfully handled. // Only report direct dependencies that were successfully handled.
if (It != MDC.ModularDeps.end()) if (It != MDC.ModularDeps.end())
MDC.Consumer.handleDirectModuleDependency(It->second->ID); MDC.Consumer.handleDirectModuleDependency(It->second->ID);
@@ -677,24 +686,22 @@ static StringRef makeAbsoluteAndCanonicalize(CompilerInstance &CI,
} }
std::optional<ModuleID> std::optional<ModuleID>
ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { ModuleDepCollectorPP::handleTopLevelModule(serialization::ModuleFile *MF) {
assert(M == M->getTopLevelModule() && "Expected top level module!");
// A top-level module might not be actually imported as a module when
// -fmodule-name is used to compile a translation unit that imports this
// module. In that case it can be skipped. The appropriate header
// dependencies will still be reported as expected.
if (!M->getASTFileKey())
return {};
// If this module has been handled already, just return its ID. // If this module has been handled already, just return its ID.
if (auto ModI = MDC.ModularDeps.find(M); ModI != MDC.ModularDeps.end()) if (auto ModI = MDC.ModularDeps.find(MF); ModI != MDC.ModularDeps.end())
return ModI->second->ID; return ModI->second->ID;
Module *M = MDC.ScanInstance.getPreprocessor()
.getHeaderSearchInfo()
.getModuleMap()
.findModule(MF->ModuleName);
assert(M && M == M->getTopLevelModule() &&
"ModuleFile without top-level Module");
auto OwnedMD = std::make_unique<ModuleDeps>(); auto OwnedMD = std::make_unique<ModuleDeps>();
ModuleDeps &MD = *OwnedMD; ModuleDeps &MD = *OwnedMD;
MD.ID.ModuleName = M->getFullModuleName(); MD.ID.ModuleName = MF->ModuleName;
MD.IsSystem = M->IsSystem; MD.IsSystem = M->IsSystem;
// Start off with the assumption that this module is shareable when there // Start off with the assumption that this module is shareable when there
@@ -716,10 +723,6 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MD.ClangModuleMapFile = std::string(Path); MD.ClangModuleMapFile = std::string(Path);
} }
serialization::ModuleFile *MF =
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
*M->getASTFileKey());
llvm::SmallString<256> Storage; llvm::SmallString<256> Storage;
MD.FileDepsBaseDir = MD.FileDepsBaseDir =
makeAbsoluteAndCanonicalize(MDC.ScanInstance, MF->BaseDirectory, Storage); makeAbsoluteAndCanonicalize(MDC.ScanInstance, MF->BaseDirectory, Storage);
@@ -736,10 +739,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MDC.addFileDep(MD, IFI.UnresolvedImportedFilename); MDC.addFileDep(MD, IFI.UnresolvedImportedFilename);
}); });
llvm::DenseSet<const Module *> SeenDeps; addAllModuleDeps(*MF, MD);
addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
addAllSubmoduleDeps(M, MD, SeenDeps);
addAllAffectingClangModules(M, MD, SeenDeps);
SmallString<0> PathBuf; SmallString<0> PathBuf;
PathBuf.reserve(256); PathBuf.reserve(256);
@@ -810,102 +810,30 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
MD.BuildInfo = std::move(CI); MD.BuildInfo = std::move(CI);
MDC.ModularDeps.insert({M, std::move(OwnedMD)}); MDC.ModularDeps.insert({MF, std::move(OwnedMD)});
return MD.ID; return MD.ID;
} }
static void forEachSubmoduleSorted(const Module *M, void ModuleDepCollectorPP::addAllModuleDeps(serialization::ModuleFile &MF,
llvm::function_ref<void(const Module *)> F) { ModuleDeps &MD) {
// Submodule order depends on order of header includes for inferred submodules llvm::DenseSet<const Module *> Seen;
// we don't care about the exact order, so sort so that it's consistent across for (serialization::ModuleFile *Import : MF.Imports) {
// TUs to improve sharing. if (MDC.isPrebuiltModule(Import)) {
SmallVector<const Module *> Submodules(M->submodules()); MD.PrebuiltModuleDeps.emplace_back(Import);
llvm::stable_sort(Submodules, [](const Module *A, const Module *B) { if (MD.IsInStableDirectories) {
return A->Name < B->Name; auto It = MDC.PrebuiltModulesASTMap.find(
}); MD.PrebuiltModuleDeps.back().PCMFile);
for (const Module *SubM : Submodules) MD.IsInStableDirectories =
F(SubM); It != MDC.PrebuiltModulesASTMap.end() && It->second.isInStableDir();
} }
} else {
void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( if (auto ID = handleTopLevelModule(Import)) {
const Module *M, ModuleDeps &MD, MD.ClangModuleDeps.push_back(std::move(*ID));
llvm::DenseSet<const Module *> &SeenSubmodules) { if (MD.IsInStableDirectories)
addModulePrebuiltDeps(M, MD, SeenSubmodules); MD.IsInStableDirectories =
MDC.ModularDeps[Import]->IsInStableDirectories;
forEachSubmoduleSorted(M, [&](const Module *SubM) { }
addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules);
});
}
void ModuleDepCollectorPP::addModulePrebuiltDeps(
const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &SeenSubmodules) {
for (const Module *Import : M->Imports)
if (Import->getTopLevelModule() != M->getTopLevelModule())
if (MDC.isPrebuiltModule(Import->getTopLevelModule()))
if (SeenSubmodules.insert(Import->getTopLevelModule()).second) {
MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule());
if (MD.IsInStableDirectories) {
auto PrebuiltModulePropIt = MDC.PrebuiltModulesASTMap.find(
MD.PrebuiltModuleDeps.back().PCMFile);
MD.IsInStableDirectories =
(PrebuiltModulePropIt != MDC.PrebuiltModulesASTMap.end()) &&
PrebuiltModulePropIt->second.isInStableDir();
}
}
}
void ModuleDepCollectorPP::addAllSubmoduleDeps(
const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules) {
addModuleDep(M, MD, AddedModules);
forEachSubmoduleSorted(M, [&](const Module *SubM) {
addAllSubmoduleDeps(SubM, MD, AddedModules);
});
}
void ModuleDepCollectorPP::addOneModuleDep(const Module *M, const ModuleID ID,
ModuleDeps &MD) {
MD.ClangModuleDeps.push_back(std::move(ID));
if (MD.IsInStableDirectories)
MD.IsInStableDirectories = MDC.ModularDeps[M]->IsInStableDirectories;
}
void ModuleDepCollectorPP::addModuleDep(
const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules) {
for (const Module *Import : M->Imports) {
if (Import->getTopLevelModule() != M->getTopLevelModule() &&
!MDC.isPrebuiltModule(Import)) {
if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
if (AddedModules.insert(Import->getTopLevelModule()).second)
addOneModuleDep(Import->getTopLevelModule(), *ImportID, MD);
}
}
}
void ModuleDepCollectorPP::addAllAffectingClangModules(
const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules) {
addAffectingClangModule(M, MD, AddedModules);
for (const Module *SubM : M->submodules())
addAllAffectingClangModules(SubM, MD, AddedModules);
}
void ModuleDepCollectorPP::addAffectingClangModule(
const Module *M, ModuleDeps &MD,
llvm::DenseSet<const Module *> &AddedModules) {
for (const Module *Affecting : M->AffectingClangModules) {
assert(Affecting == Affecting->getTopLevelModule() &&
"Not quite import not top-level module");
if (Affecting != M->getTopLevelModule() &&
!MDC.isPrebuiltModule(Affecting)) {
if (auto ImportID = handleTopLevelModule(Affecting))
if (AddedModules.insert(Affecting).second)
addOneModuleDep(Affecting, *ImportID, MD);
} }
} }
} }
@@ -932,15 +860,14 @@ void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
void ModuleDepCollector::attachToASTReader(ASTReader &R) {} void ModuleDepCollector::attachToASTReader(ASTReader &R) {}
bool ModuleDepCollector::isPrebuiltModule(const Module *M) { bool ModuleDepCollector::isPrebuiltModule(const serialization::ModuleFile *MF) {
std::string Name(M->getTopLevelModuleName());
const auto &PrebuiltModuleFiles = const auto &PrebuiltModuleFiles =
ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles;
auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(MF->ModuleName);
if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end())
return false; return false;
assert("Prebuilt module came from the expected AST file" && assert("Prebuilt module came from the expected AST file" &&
PrebuiltModuleFileIt->second == M->getASTFileName()->str()); PrebuiltModuleFileIt->second == MF->FileName.str());
return true; return true;
} }

View File

@@ -1160,6 +1160,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
// Within P1689 format, we don't want all the paths to be absolute path // Within P1689 format, we don't want all the paths to be absolute path
// since it may violate the traditional make style dependencies info. // since it may violate the traditional make style dependencies info.
Opts.ReportAbsolutePaths = Format != ScanningOutputFormat::P1689; Opts.ReportAbsolutePaths = Format != ScanningOutputFormat::P1689;
Opts.ReportVisibleModules = EmitVisibleModules;
Opts.EagerLoadModules = EagerLoadModules; Opts.EagerLoadModules = EagerLoadModules;
Opts.TraceVFS = Verbose; Opts.TraceVFS = Verbose;
Opts.AsyncScanModules = AsyncScanModules; Opts.AsyncScanModules = AsyncScanModules;