From bf2b1efce5b99def672ec61b1d449d066d5db7bd Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 29 Apr 2026 13:48:04 -0700 Subject: [PATCH] [clang][deps] Use `ModuleFile` instead of `Module` (#194028) This switches the dependency scanner from handling `Module` objects to dealing with `serialization::ModuleFile`. Module file already contains (almost) all of the information the scanner needs, and has the advantage of not forcing deserialization of `Module` objects from PCM files, which will become important in a later PR. This alone improves clean scans by 1.8% and incremental scans by 2.6%. This effect is likely caused by removing iteration over many `Module` objects, removing deduplication via associative containers deduplication of work, and sorting `Module` objects by name to ensure deterministic order. --- .../DependencyScanningService.h | 2 + .../DependencyScanning/ModuleDepCollector.h | 47 ++--- .../DependencyScanning/ModuleDepCollector.cpp | 183 ++++++------------ clang/tools/clang-scan-deps/ClangScanDeps.cpp | 1 + 4 files changed, 71 insertions(+), 162 deletions(-) diff --git a/clang/include/clang/DependencyScanning/DependencyScanningService.h b/clang/include/clang/DependencyScanning/DependencyScanningService.h index d35604a83985..f379381faea5 100644 --- a/clang/include/clang/DependencyScanning/DependencyScanningService.h +++ b/clang/include/clang/DependencyScanning/DependencyScanningService.h @@ -93,6 +93,8 @@ struct DependencyScanningServiceOptions { ScanningOptimizations OptimizeArgs = ScanningOptimizations::Default; /// Whether to make reported file paths absolute. bool ReportAbsolutePaths = true; + /// Whether to report modules visible from modules that are imported directly. + bool ReportVisibleModules = false; /// Whether the resulting command lines should load explicit PCMs eagerly. bool EagerLoadModules = false; /// Whether to trace VFS accesses during the scan. diff --git a/clang/include/clang/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/DependencyScanning/ModuleDepCollector.h index bbb0f5b4d659..e7dd907a0038 100644 --- a/clang/include/clang/DependencyScanning/ModuleDepCollector.h +++ b/clang/include/clang/DependencyScanning/ModuleDepCollector.h @@ -40,10 +40,9 @@ struct PrebuiltModuleDep { std::string PCMFile; std::string ModuleMapFile; - explicit PrebuiltModuleDep(const Module *M) - : ModuleName(M->getTopLevelModuleName()), - PCMFile(M->getASTFileName()->str()), - ModuleMapFile(M->PresumedModuleMapFile) {} + explicit PrebuiltModuleDep(const serialization::ModuleFile *MF) + : ModuleName(MF->ModuleName), PCMFile(MF->FileName.str()), + ModuleMapFile(MF->ModuleMapPath) {} }; /// Attributes loaded from AST files of prebuilt modules collected prior to @@ -247,34 +246,12 @@ private: void handleImport(const Module *Imported); - /// Adds direct modular dependencies that have already been built to the - /// ModuleDeps instance. - void - addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD, - llvm::DenseSet &SeenSubmodules); - void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD, - llvm::DenseSet &SeenSubmodules); - - /// Traverses the previously collected direct modular dependencies to discover - /// transitive modular dependencies and fills the parent \c ModuleDepCollector - /// with both. /// Returns the ID or nothing if the dependency is spurious and is ignored. - std::optional handleTopLevelModule(const Module *M); - void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules); - void addModuleDep(const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules); + std::optional handleTopLevelModule(serialization::ModuleFile *MF); - /// Traverses the affecting modules and updates \c MD with references to the - /// parent \c ModuleDepCollector info. - void - addAllAffectingClangModules(const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules); - void addAffectingClangModule(const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules); - - /// Add discovered module dependency for the given module. - void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD); + /// Adds direct module dependencies to the ModuleDeps instance. This includes + /// prebuilt module and implicitly-built modules. + void addAllModuleDeps(serialization::ModuleFile &MF, ModuleDeps &MD); }; /// Collects modular and non-modular dependencies of the main file by attaching @@ -321,14 +298,16 @@ private: /// textually included header files. std::vector FileDeps; /// Direct and transitive modular dependencies of the main source file. - llvm::MapVector> ModularDeps; + llvm::MapVector> + ModularDeps; /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without /// a preprocessor. Storage owned by \c ModularDeps. llvm::DenseMap ModuleDepsByID; /// Direct modular dependencies that have already been built. - llvm::MapVector DirectPrebuiltModularDeps; + llvm::MapVector + DirectPrebuiltModularDeps; /// Working set of direct modular dependencies. - llvm::SetVector DirectModularDeps; + llvm::SetVector DirectModularDeps; /// Working set of direct modular dependencies, as they were imported. llvm::SmallPtrSet DirectImports; /// All direct and transitive visible modules. @@ -351,7 +330,7 @@ private: ModuleDepCollectorPP *CollectorPPPtr = nullptr; /// Checks whether the module is known as being prebuilt. - bool isPrebuiltModule(const Module *M); + bool isPrebuiltModule(const serialization::ModuleFile *MF); /// Computes all visible modules resolved from direct imports. void addVisibleModules(); diff --git a/clang/lib/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/DependencyScanning/ModuleDepCollector.cpp index 06347fb5ade7..f9bc4cc3098e 100644 --- a/clang/lib/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/DependencyScanning/ModuleDepCollector.cpp @@ -584,12 +584,16 @@ void ModuleDepCollectorPP::handleImport(const Module *Imported) { return; const Module *TopLevelModule = Imported->getTopLevelModule(); + const ModuleFileKey *MFKey = TopLevelModule->getASTFileKey(); + if (!MFKey) + return; + serialization::ModuleFile *MF = + MDC.ScanInstance.getASTReader()->getModuleManager().lookup(*MFKey); - if (MDC.isPrebuiltModule(TopLevelModule)) - MDC.DirectPrebuiltModularDeps.insert( - {TopLevelModule, PrebuiltModuleDep{TopLevelModule}}); + if (MDC.isPrebuiltModule(MF)) + MDC.DirectPrebuiltModularDeps.insert({MF, PrebuiltModuleDep{MF}}); else { - MDC.DirectModularDeps.insert(TopLevelModule); + MDC.DirectModularDeps.insert(MF); MDC.DirectImports.insert(Imported); } } @@ -629,14 +633,19 @@ void ModuleDepCollectorPP::EndOfMainFile() { } for (const Module *M : - MDC.ScanInstance.getPreprocessor().getAffectingClangModules()) - if (!MDC.isPrebuiltModule(M)) - MDC.DirectModularDeps.insert(M); + MDC.ScanInstance.getPreprocessor().getAffectingClangModules()) { + serialization::ModuleFile *MF = + MDC.ScanInstance.getASTReader()->getModuleManager().lookup( + *M->getASTFileKey()); + if (!MDC.isPrebuiltModule(MF)) + MDC.DirectModularDeps.insert(MF); + } - MDC.addVisibleModules(); + if (MDC.Service.getOpts().ReportVisibleModules) + MDC.addVisibleModules(); - for (const Module *M : MDC.DirectModularDeps) - handleTopLevelModule(M); + for (serialization::ModuleFile *MF : MDC.DirectModularDeps) + handleTopLevelModule(MF); MDC.Consumer.handleContextHash( MDC.ScanInstance.getInvocation().computeContextHash()); @@ -649,8 +658,8 @@ void ModuleDepCollectorPP::EndOfMainFile() { for (auto &&I : MDC.ModularDeps) MDC.Consumer.handleModuleDependency(*I.second); - for (const Module *M : MDC.DirectModularDeps) { - auto It = MDC.ModularDeps.find(M); + for (serialization::ModuleFile *MF : MDC.DirectModularDeps) { + auto It = MDC.ModularDeps.find(MF); // Only report direct dependencies that were successfully handled. if (It != MDC.ModularDeps.end()) MDC.Consumer.handleDirectModuleDependency(It->second->ID); @@ -677,24 +686,22 @@ static StringRef makeAbsoluteAndCanonicalize(CompilerInstance &CI, } std::optional -ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { - assert(M == M->getTopLevelModule() && "Expected top level module!"); - - // A top-level module might not be actually imported as a module when - // -fmodule-name is used to compile a translation unit that imports this - // module. In that case it can be skipped. The appropriate header - // dependencies will still be reported as expected. - if (!M->getASTFileKey()) - return {}; - +ModuleDepCollectorPP::handleTopLevelModule(serialization::ModuleFile *MF) { // If this module has been handled already, just return its ID. - if (auto ModI = MDC.ModularDeps.find(M); ModI != MDC.ModularDeps.end()) + if (auto ModI = MDC.ModularDeps.find(MF); ModI != MDC.ModularDeps.end()) return ModI->second->ID; + Module *M = MDC.ScanInstance.getPreprocessor() + .getHeaderSearchInfo() + .getModuleMap() + .findModule(MF->ModuleName); + assert(M && M == M->getTopLevelModule() && + "ModuleFile without top-level Module"); + auto OwnedMD = std::make_unique(); ModuleDeps &MD = *OwnedMD; - MD.ID.ModuleName = M->getFullModuleName(); + MD.ID.ModuleName = MF->ModuleName; MD.IsSystem = M->IsSystem; // Start off with the assumption that this module is shareable when there @@ -716,10 +723,6 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MD.ClangModuleMapFile = std::string(Path); } - serialization::ModuleFile *MF = - MDC.ScanInstance.getASTReader()->getModuleManager().lookup( - *M->getASTFileKey()); - llvm::SmallString<256> Storage; MD.FileDepsBaseDir = makeAbsoluteAndCanonicalize(MDC.ScanInstance, MF->BaseDirectory, Storage); @@ -736,10 +739,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MDC.addFileDep(MD, IFI.UnresolvedImportedFilename); }); - llvm::DenseSet SeenDeps; - addAllSubmodulePrebuiltDeps(M, MD, SeenDeps); - addAllSubmoduleDeps(M, MD, SeenDeps); - addAllAffectingClangModules(M, MD, SeenDeps); + addAllModuleDeps(*MF, MD); SmallString<0> PathBuf; PathBuf.reserve(256); @@ -810,102 +810,30 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MD.BuildInfo = std::move(CI); - MDC.ModularDeps.insert({M, std::move(OwnedMD)}); + MDC.ModularDeps.insert({MF, std::move(OwnedMD)}); return MD.ID; } -static void forEachSubmoduleSorted(const Module *M, - llvm::function_ref F) { - // Submodule order depends on order of header includes for inferred submodules - // we don't care about the exact order, so sort so that it's consistent across - // TUs to improve sharing. - SmallVector Submodules(M->submodules()); - llvm::stable_sort(Submodules, [](const Module *A, const Module *B) { - return A->Name < B->Name; - }); - for (const Module *SubM : Submodules) - F(SubM); -} - -void ModuleDepCollectorPP::addAllSubmodulePrebuiltDeps( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &SeenSubmodules) { - addModulePrebuiltDeps(M, MD, SeenSubmodules); - - forEachSubmoduleSorted(M, [&](const Module *SubM) { - addAllSubmodulePrebuiltDeps(SubM, MD, SeenSubmodules); - }); -} - -void ModuleDepCollectorPP::addModulePrebuiltDeps( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &SeenSubmodules) { - for (const Module *Import : M->Imports) - if (Import->getTopLevelModule() != M->getTopLevelModule()) - if (MDC.isPrebuiltModule(Import->getTopLevelModule())) - if (SeenSubmodules.insert(Import->getTopLevelModule()).second) { - MD.PrebuiltModuleDeps.emplace_back(Import->getTopLevelModule()); - if (MD.IsInStableDirectories) { - auto PrebuiltModulePropIt = MDC.PrebuiltModulesASTMap.find( - MD.PrebuiltModuleDeps.back().PCMFile); - MD.IsInStableDirectories = - (PrebuiltModulePropIt != MDC.PrebuiltModulesASTMap.end()) && - PrebuiltModulePropIt->second.isInStableDir(); - } - } -} - -void ModuleDepCollectorPP::addAllSubmoduleDeps( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules) { - addModuleDep(M, MD, AddedModules); - - forEachSubmoduleSorted(M, [&](const Module *SubM) { - addAllSubmoduleDeps(SubM, MD, AddedModules); - }); -} - -void ModuleDepCollectorPP::addOneModuleDep(const Module *M, const ModuleID ID, - ModuleDeps &MD) { - MD.ClangModuleDeps.push_back(std::move(ID)); - if (MD.IsInStableDirectories) - MD.IsInStableDirectories = MDC.ModularDeps[M]->IsInStableDirectories; -} - -void ModuleDepCollectorPP::addModuleDep( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules) { - for (const Module *Import : M->Imports) { - if (Import->getTopLevelModule() != M->getTopLevelModule() && - !MDC.isPrebuiltModule(Import)) { - if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule())) - if (AddedModules.insert(Import->getTopLevelModule()).second) - addOneModuleDep(Import->getTopLevelModule(), *ImportID, MD); - } - } -} - -void ModuleDepCollectorPP::addAllAffectingClangModules( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules) { - addAffectingClangModule(M, MD, AddedModules); - - for (const Module *SubM : M->submodules()) - addAllAffectingClangModules(SubM, MD, AddedModules); -} - -void ModuleDepCollectorPP::addAffectingClangModule( - const Module *M, ModuleDeps &MD, - llvm::DenseSet &AddedModules) { - for (const Module *Affecting : M->AffectingClangModules) { - assert(Affecting == Affecting->getTopLevelModule() && - "Not quite import not top-level module"); - if (Affecting != M->getTopLevelModule() && - !MDC.isPrebuiltModule(Affecting)) { - if (auto ImportID = handleTopLevelModule(Affecting)) - if (AddedModules.insert(Affecting).second) - addOneModuleDep(Affecting, *ImportID, MD); +void ModuleDepCollectorPP::addAllModuleDeps(serialization::ModuleFile &MF, + ModuleDeps &MD) { + llvm::DenseSet Seen; + for (serialization::ModuleFile *Import : MF.Imports) { + if (MDC.isPrebuiltModule(Import)) { + MD.PrebuiltModuleDeps.emplace_back(Import); + if (MD.IsInStableDirectories) { + auto It = MDC.PrebuiltModulesASTMap.find( + MD.PrebuiltModuleDeps.back().PCMFile); + MD.IsInStableDirectories = + It != MDC.PrebuiltModulesASTMap.end() && It->second.isInStableDir(); + } + } else { + if (auto ID = handleTopLevelModule(Import)) { + MD.ClangModuleDeps.push_back(std::move(*ID)); + if (MD.IsInStableDirectories) + MD.IsInStableDirectories = + MDC.ModularDeps[Import]->IsInStableDirectories; + } } } } @@ -932,15 +860,14 @@ void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) { void ModuleDepCollector::attachToASTReader(ASTReader &R) {} -bool ModuleDepCollector::isPrebuiltModule(const Module *M) { - std::string Name(M->getTopLevelModuleName()); +bool ModuleDepCollector::isPrebuiltModule(const serialization::ModuleFile *MF) { const auto &PrebuiltModuleFiles = ScanInstance.getHeaderSearchOpts().PrebuiltModuleFiles; - auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(Name); + auto PrebuiltModuleFileIt = PrebuiltModuleFiles.find(MF->ModuleName); if (PrebuiltModuleFileIt == PrebuiltModuleFiles.end()) return false; assert("Prebuilt module came from the expected AST file" && - PrebuiltModuleFileIt->second == M->getASTFileName()->str()); + PrebuiltModuleFileIt->second == MF->FileName.str()); return true; } diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index ccc8ed5d7942..1d80ac519bb2 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -1160,6 +1160,7 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { // Within P1689 format, we don't want all the paths to be absolute path // since it may violate the traditional make style dependencies info. Opts.ReportAbsolutePaths = Format != ScanningOutputFormat::P1689; + Opts.ReportVisibleModules = EmitVisibleModules; Opts.EagerLoadModules = EagerLoadModules; Opts.TraceVFS = Verbose; Opts.AsyncScanModules = AsyncScanModules;