Files
llvm-project/clang/lib/Serialization/ModuleManager.cpp
Jan Svoboda febd3de07d [clang] Get the directory identity from ModuleCache instead of FileManager (#193070)
Using `FileManager`'s caching and deduplication functionality for
assigning identity to the module cache is handy, but it relies on two
assumptions:
* the rest of the compiler consistently calls
`FileManager::getOptionalDirectoryRef()` with `/*CacheFailure=*/false`
for the module cache path,
* the VFS is not caching failed stats for the module cache path.

This PR implements this functionality in the `ModuleCache` interface,
which is conceptually the right place for it. This PR enables us to land
the VFS simplifications in
https://github.com/llvm/llvm-project/pull/190843.
2026-04-21 17:39:59 +00:00

527 lines
18 KiB
C++

//===- ModuleManager.cpp - Module Manager ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the ModuleManager class, which manages a set of loaded
// modules for the ASTReader.
//
//===----------------------------------------------------------------------===//
#include "clang/Serialization/ModuleManager.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/LLVM.h"
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/ModuleMap.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "clang/Serialization/ModuleCache.h"
#include "clang/Serialization/ModuleFile.h"
#include "clang/Serialization/PCHContainerOperations.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <cassert>
#include <memory>
#include <string>
#include <system_error>
using namespace clang;
using namespace serialization;
std::optional<ModuleFileKey>
ModuleManager::makeKey(const ModuleFileName &Name) const {
if (unsigned SuffixLen = Name.getImplicitModuleSuffixLength()) {
StringRef ModuleCachePath = StringRef(Name).drop_back(SuffixLen);
StringRef ImplicitModuleSuffix = StringRef(Name).take_back(SuffixLen);
if (auto *ModuleCacheDir = ModCache.getDirectoryPtr(ModuleCachePath))
return ModuleFileKey(ModuleCacheDir, ImplicitModuleSuffix);
} else {
if (auto ModuleFile = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/true,
/*CacheFailure=*/false,
/*IsText=*/false))
return ModuleFileKey(*ModuleFile);
}
return std::nullopt;
}
ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
if (const Module *Mod = HeaderSearchInfo.getModuleMap().findModule(Name))
if (const ModuleFileName *FileName = Mod->getASTFileName())
return lookupByFileName(*FileName);
return nullptr;
}
ModuleFile *ModuleManager::lookupByFileName(ModuleFileName Name) const {
std::optional<ModuleFileKey> Key = makeKey(Name);
return Key ? lookup(*Key) : nullptr;
}
ModuleFile *ModuleManager::lookup(ModuleFileKey Key) const {
return Modules.lookup(Key);
}
std::unique_ptr<llvm::MemoryBuffer>
ModuleManager::lookupBuffer(StringRef Name, off_t &Size, time_t &ModTime) {
auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false,
/*CacheFailure=*/false,
/*IsText=*/false);
if (!Entry)
return nullptr;
Size = Entry->getSize();
ModTime = Entry->getModificationTime();
return std::move(InMemoryBuffers[*Entry]);
}
bool ModuleManager::isModuleFileOutOfDate(off_t Size, time_t ModTime,
off_t ExpectedSize,
time_t ExpectedModTime,
AddModuleResult &Result) {
bool OutOfDate = false;
if (ExpectedSize && ExpectedSize != Size) {
Result.Changes.push_back({Change::Size, ExpectedSize, Size});
OutOfDate = true;
}
if (ExpectedModTime && ExpectedModTime != ModTime) {
Result.Changes.push_back({Change::ModTime, ExpectedModTime, ModTime});
OutOfDate = true;
}
return OutOfDate;
}
bool ModuleManager::checkSignature(ASTFileSignature Signature,
ASTFileSignature ExpectedSignature,
AddModuleResult &Result) {
if (!ExpectedSignature || Signature == ExpectedSignature)
return false;
Result.SignatureError =
Signature ? "signature mismatch" : "could not read module signature";
return true;
}
static void updateModuleImports(ModuleFile &MF, ModuleFile *ImportedBy,
SourceLocation ImportLoc) {
if (ImportedBy) {
MF.ImportedBy.insert(ImportedBy);
ImportedBy->Imports.insert(&MF);
} else {
if (!MF.DirectlyImported)
MF.ImportLoc = ImportLoc;
MF.DirectlyImported = true;
}
}
AddModuleResult ModuleManager::addModule(
ModuleFileName FileName, ModuleKind Type, SourceLocation ImportLoc,
ModuleFile *ImportedBy, unsigned Generation, off_t ExpectedSize,
time_t ExpectedModTime, ASTFileSignature ExpectedSignature,
ASTFileSignatureReader ReadSignature) {
AddModuleResult Result;
uint64_t InputFilesValidationTimestamp = 0;
if (Type == MK_ImplicitModule)
InputFilesValidationTimestamp = ModCache.getModuleTimestamp(FileName);
bool IgnoreModTime = Type == MK_ExplicitModule || Type == MK_PrebuiltModule;
if (ImportedBy)
IgnoreModTime &= ImportedBy->Kind == MK_ExplicitModule ||
ImportedBy->Kind == MK_PrebuiltModule;
if (IgnoreModTime) {
// If neither this file nor the importer are in the module cache, this file
// might have a different mtime due to being moved across filesystems in
// a distributed build. The size must still match, though. (As must the
// contents, but we can't check that.)
ExpectedModTime = 0;
}
std::optional<ModuleFileKey> FileKey = makeKey(FileName);
if (!FileKey) {
Result.K = AddModuleResult::Missing;
return Result;
}
// Check whether we already loaded this module before.
// Note: `isModuleFileOutOfDate` and `checkSignature` are mutually exclusive
// in practice. If a signature is stored, it means size/mtime values have been
// zeroed out. If size/mtime are non-NULL, the signature is empty.
if (ModuleFile *ModuleEntry = lookup(*FileKey)) {
if (isModuleFileOutOfDate(ModuleEntry->Size, ModuleEntry->ModTime,
ExpectedSize, ExpectedModTime, Result)) {
Result.setOutOfDate(ModuleEntry->InputFilesValidationStatus);
return Result;
}
// Check the stored signature.
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, Result)) {
Result.setOutOfDate(ModuleEntry->InputFilesValidationStatus);
return Result;
}
Result.Module = ModuleEntry;
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
Result.K = AddModuleResult::AlreadyLoaded;
return Result;
}
// Load the contents of the module
off_t Size = ExpectedSize;
time_t ModTime = ExpectedModTime;
llvm::MemoryBuffer *ModuleBuffer = nullptr;
std::unique_ptr<llvm::MemoryBuffer> NewFileBuffer = nullptr;
if (std::unique_ptr<llvm::MemoryBuffer> Buffer =
lookupBuffer(FileName, Size, ModTime)) {
// The buffer was already provided for us.
ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM(
FileName, std::move(Buffer), Size, ModTime);
} else if (llvm::MemoryBuffer *Buffer =
getModuleCache().getInMemoryModuleCache().lookupPCM(
FileName, Size, ModTime)) {
ModuleBuffer = Buffer;
} else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM(
FileName)) {
// Report that the module is out of date, since we tried (and failed) to
// import it earlier. No ModuleFile exists yet, so derive the validation
// status from the module kind being loaded.
Result.setOutOfDate(Type == MK_ExplicitModule || Type == MK_PrebuiltModule
? InputFilesValidation::Disabled
: InputFilesValidation::NotStarted);
return Result;
} else {
auto Buf = [&]() -> Expected<std::unique_ptr<llvm::MemoryBuffer>> {
// Implicit modules live in the module cache.
if (FileName.getImplicitModuleSuffixLength())
return ModCache.read(FileName, Size, ModTime);
// Explicit modules are treated as any other compiler input file, load
// them via FileManager.
Expected<FileEntryRef> Entry =
FileName == StringRef("-")
? FileMgr.getSTDIN()
: FileMgr.getFileRef(FileName, /*OpenFile=*/true,
/*CacheFailure=*/false,
/*IsText=*/false);
if (!Entry)
return Entry.takeError();
Size = Entry->getSize();
ModTime = Entry->getModificationTime();
// RequiresNullTerminator is false because module files don't need it, and
// this allows the file to still be mmapped.
return llvm::errorOrToExpected(
FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/false,
/*RequiresNullTerminator=*/false,
/*MaybeLimit=*/std::nullopt,
/*IsText=*/false));
}();
if (!Buf) {
Result.BufferError = llvm::toString(Buf.takeError());
Result.K = AddModuleResult::Missing;
return Result;
}
NewFileBuffer = std::move(*Buf);
ModuleBuffer = NewFileBuffer.get();
}
// Allocate bookkeeping for a module file not yet loaded into this reader.
auto NewModule = std::make_unique<ModuleFile>(Type, *FileKey, Generation);
NewModule->Index = Chain.size();
NewModule->FileName = FileName;
NewModule->ImportLoc = ImportLoc;
NewModule->InputFilesValidationTimestamp = InputFilesValidationTimestamp;
NewModule->Size = Size;
NewModule->ModTime = ModTime;
NewModule->Buffer = ModuleBuffer;
// Initialize the stream.
NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
// Check file properties.
if (isModuleFileOutOfDate(Size, ModTime, ExpectedSize, ExpectedModTime,
Result)) {
Result.setOutOfDate(NewModule->InputFilesValidationStatus);
return Result;
}
// Read the signature eagerly now so that we can check it. Avoid calling
// ReadSignature unless there's something to check though.
if (ExpectedSignature && checkSignature(ReadSignature(NewModule->Data),
ExpectedSignature, Result)) {
Result.setOutOfDate(NewModule->InputFilesValidationStatus);
return Result;
}
if (NewFileBuffer)
getModuleCache().getInMemoryModuleCache().addPCM(
FileName, std::move(NewFileBuffer), Size, ModTime);
// We're keeping this module. Store it in the map.
Result.Module = Modules[*FileKey] = NewModule.get();
updateModuleImports(*NewModule, ImportedBy, ImportLoc);
if (!NewModule->isModule())
PCHChain.push_back(NewModule.get());
if (!ImportedBy)
Roots.push_back(NewModule.get());
Chain.push_back(std::move(NewModule));
Result.K = AddModuleResult::NewlyLoaded;
return Result;
}
void ModuleManager::removeModules(ModuleIterator First) {
auto Last = end();
if (First == Last)
return;
// Explicitly clear VisitOrder since we might not notice it is stale.
VisitOrder.clear();
// Collect the set of module file pointers that we'll be removing.
llvm::SmallPtrSet<ModuleFile *, 4> victimSet(
(llvm::pointer_iterator<ModuleIterator>(First)),
(llvm::pointer_iterator<ModuleIterator>(Last)));
auto IsVictim = [&](ModuleFile *MF) {
return victimSet.count(MF);
};
// Remove any references to the now-destroyed modules.
for (auto I = begin(); I != First; ++I) {
I->Imports.remove_if(IsVictim);
I->ImportedBy.remove_if(IsVictim);
}
llvm::erase_if(Roots, IsVictim);
// Remove the modules from the PCH chain.
for (auto I = First; I != Last; ++I) {
if (!I->isModule()) {
PCHChain.erase(llvm::find(PCHChain, &*I), PCHChain.end());
break;
}
}
// Delete the modules.
for (ModuleIterator victim = First; victim != Last; ++victim)
Modules.erase(victim->FileKey);
Chain.erase(Chain.begin() + (First - begin()), Chain.end());
}
void
ModuleManager::addInMemoryBuffer(StringRef FileName,
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
FileEntryRef Entry =
FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0);
InMemoryBuffers[Entry] = std::move(Buffer);
}
std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() {
// Fast path: if we have a cached state, use it.
if (FirstVisitState) {
auto Result = std::move(FirstVisitState);
FirstVisitState = std::move(Result->NextState);
return Result;
}
// Allocate and return a new state.
return std::make_unique<VisitState>(size());
}
void ModuleManager::returnVisitState(std::unique_ptr<VisitState> State) {
assert(State->NextState == nullptr && "Visited state is in list?");
State->NextState = std::move(FirstVisitState);
FirstVisitState = std::move(State);
}
void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
GlobalIndex = Index;
if (!GlobalIndex) {
ModulesInCommonWithGlobalIndex.clear();
return;
}
// Notify the global module index about all of the modules we've already
// loaded.
for (ModuleFile &M : *this)
if (!GlobalIndex->loadedModuleFile(&M))
ModulesInCommonWithGlobalIndex.push_back(&M);
}
void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
return;
ModulesInCommonWithGlobalIndex.push_back(MF);
}
ModuleManager::ModuleManager(FileManager &FileMgr, ModuleCache &ModCache,
const PCHContainerReader &PCHContainerRdr,
const HeaderSearch &HeaderSearchInfo)
: FileMgr(FileMgr), ModCache(ModCache), PCHContainerRdr(PCHContainerRdr),
HeaderSearchInfo(HeaderSearchInfo) {}
void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
// If the visitation order vector is the wrong size, recompute the order.
if (VisitOrder.size() != Chain.size()) {
unsigned N = size();
VisitOrder.clear();
VisitOrder.reserve(N);
// Record the number of incoming edges for each module. When we
// encounter a module with no incoming edges, push it into the queue
// to seed the queue.
SmallVector<ModuleFile *, 4> Queue;
Queue.reserve(N);
llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
UnusedIncomingEdges.resize(size());
for (ModuleFile &M : llvm::reverse(*this)) {
unsigned Size = M.ImportedBy.size();
UnusedIncomingEdges[M.Index] = Size;
if (!Size)
Queue.push_back(&M);
}
// Traverse the graph, making sure to visit a module before visiting any
// of its dependencies.
while (!Queue.empty()) {
ModuleFile *CurrentModule = Queue.pop_back_val();
VisitOrder.push_back(CurrentModule);
// For any module that this module depends on, push it on the
// stack (if it hasn't already been marked as visited).
for (ModuleFile *M : llvm::reverse(CurrentModule->Imports)) {
// Remove our current module as an impediment to visiting the
// module we depend on. If we were the last unvisited module
// that depends on this particular module, push it into the
// queue to be visited.
unsigned &NumUnusedEdges = UnusedIncomingEdges[M->Index];
if (NumUnusedEdges && (--NumUnusedEdges == 0))
Queue.push_back(M);
}
}
assert(VisitOrder.size() == N && "Visitation order is wrong?");
FirstVisitState = nullptr;
}
auto State = allocateVisitState();
unsigned VisitNumber = State->NextVisitNumber++;
// If the caller has provided us with a hit-set that came from the global
// module index, mark every module file in common with the global module
// index that is *not* in that set as 'visited'.
if (ModuleFilesHit && !ModulesInCommonWithGlobalIndex.empty()) {
for (unsigned I = 0, N = ModulesInCommonWithGlobalIndex.size(); I != N; ++I)
{
ModuleFile *M = ModulesInCommonWithGlobalIndex[I];
if (!ModuleFilesHit->count(M))
State->VisitNumber[M->Index] = VisitNumber;
}
}
for (unsigned I = 0, N = VisitOrder.size(); I != N; ++I) {
ModuleFile *CurrentModule = VisitOrder[I];
// Should we skip this module file?
if (State->VisitNumber[CurrentModule->Index] == VisitNumber)
continue;
// Visit the module.
assert(State->VisitNumber[CurrentModule->Index] == VisitNumber - 1);
State->VisitNumber[CurrentModule->Index] = VisitNumber;
if (!Visitor(*CurrentModule))
continue;
// The visitor has requested that cut off visitation of any
// module that the current module depends on. To indicate this
// behavior, we mark all of the reachable modules as having been visited.
ModuleFile *NextModule = CurrentModule;
do {
// For any module that this module depends on, push it on the
// stack (if it hasn't already been marked as visited).
for (llvm::SetVector<ModuleFile *>::iterator
M = NextModule->Imports.begin(),
MEnd = NextModule->Imports.end();
M != MEnd; ++M) {
if (State->VisitNumber[(*M)->Index] != VisitNumber) {
State->Stack.push_back(*M);
State->VisitNumber[(*M)->Index] = VisitNumber;
}
}
if (State->Stack.empty())
break;
// Pop the next module off the stack.
NextModule = State->Stack.pop_back_val();
} while (true);
}
returnVisitState(std::move(State));
}
#ifndef NDEBUG
namespace llvm {
template<>
struct GraphTraits<ModuleManager> {
using NodeRef = ModuleFile *;
using ChildIteratorType = llvm::SetVector<ModuleFile *>::const_iterator;
using nodes_iterator = pointer_iterator<ModuleManager::ModuleConstIterator>;
static ChildIteratorType child_begin(NodeRef Node) {
return Node->Imports.begin();
}
static ChildIteratorType child_end(NodeRef Node) {
return Node->Imports.end();
}
static nodes_iterator nodes_begin(const ModuleManager &Manager) {
return nodes_iterator(Manager.begin());
}
static nodes_iterator nodes_end(const ModuleManager &Manager) {
return nodes_iterator(Manager.end());
}
};
template<>
struct DOTGraphTraits<ModuleManager> : public DefaultDOTGraphTraits {
explicit DOTGraphTraits(bool IsSimple = false)
: DefaultDOTGraphTraits(IsSimple) {}
static bool renderGraphFromBottomUp() { return true; }
std::string getNodeLabel(ModuleFile *M, const ModuleManager&) {
return M->ModuleName;
}
};
} // namespace llvm
void ModuleManager::viewGraph() {
llvm::ViewGraph(*this, "Modules");
}
#endif