[clangd] [C++20] [Modules] Introduce GC for clangd built modules (#193973)

This patch introduces simple GC for clangd built module files to avoid
the clangd built module cache to increase infinitely.

The strategy is, in a clangd built module file cache, if the clangd
built module (we think all PCM files in clangd cache are built by
clangd) was not accessed in a time (by default 3 day, controlled by
--modules-builder-versioned-gc-threshold-seconds),clangd will remove it.

The strategy is not perfect. e.g., I heard in some systems, the atime
was forbid or not update. But given a trade off between usability and
maintainability. I feel the current stategy is fine.

AI assisted.
This commit is contained in:
Chuanqi Xu
2026-04-24 22:02:13 +08:00
committed by GitHub
parent 771440f5bb
commit c49b1773b2
2 changed files with 258 additions and 0 deletions

View File

@@ -36,6 +36,12 @@ llvm::cl::opt<bool> DebugModulesBuilder(
"Remember to remove them later after debugging."),
llvm::cl::init(false));
llvm::cl::opt<unsigned> VersionedModuleFileGCThresholdSeconds(
"modules-builder-versioned-gc-threshold-seconds",
llvm::cl::desc("Delete versioned copy-on-read module files whose last "
"access time is older than this many seconds."),
llvm::cl::init(3 * 24 * 60 * 60));
//===----------------------------------------------------------------------===//
// Persistent Module Cache Layout.
//
@@ -947,6 +953,57 @@ llvm::SmallVector<std::string> getAllRequiredModules(PathRef RequiredSource,
return ModuleNames;
}
/// Collects cache roots to scan during constructor-time GC.
/// Scans one cache root and returns all `.pcm` files under it.
std::vector<std::string> collectModuleFiles(PathRef CacheRoot) {
std::vector<std::string> Result;
std::error_code EC;
for (llvm::sys::fs::recursive_directory_iterator It(CacheRoot, EC), End;
It != End && !EC; It.increment(EC)) {
if (llvm::sys::path::extension(It->path()) != ".pcm")
continue;
Result.push_back(It->path());
}
if (EC)
log("Failed to scan module cache directory {0}: {1}", CacheRoot,
EC.message());
return Result;
}
/// Performs one GC pass over a persistent module cache root.
void garbageCollectModuleCache(PathRef CacheRoot) {
for (const auto &ModuleFilePath : collectModuleFiles(CacheRoot)) {
llvm::sys::fs::file_status Status;
if (std::error_code EC = llvm::sys::fs::status(ModuleFilePath, Status)) {
log("Failed to stat cached module file {0} for GC: {1}", ModuleFilePath,
EC.message());
continue;
}
llvm::sys::TimePoint<> LastAccess = Status.getLastAccessedTime();
llvm::sys::TimePoint<> Now = std::chrono::system_clock::now();
if (LastAccess > Now)
continue;
auto Age =
std::chrono::duration_cast<std::chrono::seconds>(Now - LastAccess);
auto Threshold =
std::chrono::seconds(VersionedModuleFileGCThresholdSeconds);
if (Age <= Threshold)
continue;
if (!llvm::sys::fs::exists(ModuleFilePath))
continue;
constexpr llvm::StringLiteral Reason = "file older than GC threshold";
if (std::error_code EC = llvm::sys::fs::remove(ModuleFilePath)) {
log("Failed to remove cached module file {0} ({1}): {2}", ModuleFilePath,
Reason, EC.message());
continue;
}
log("Removed cached module file {0} ({1})", ModuleFilePath, Reason);
}
}
} // namespace
class ModulesBuilder::ModulesBuilderImpl {
@@ -969,10 +1026,39 @@ private:
const ThreadsafeFS &TFS,
ReusablePrerequisiteModules &BuiltModuleFiles);
/// Runs GC once for the cache root owning a project root.
void garbageCollectModuleCacheForProjectRoot(PathRef ProjectRoot);
ModuleFileCache Cache;
ModuleNameToSourceCache ProjectModulesCache;
std::mutex GarbageCollectedProjectRootsMutex;
llvm::StringSet<> GarbageCollectedProjectRoots;
};
void ModulesBuilder::ModulesBuilderImpl::
garbageCollectModuleCacheForProjectRoot(PathRef ProjectRoot) {
if (ProjectRoot.empty())
return;
std::string NormalizedProjectRoot = normalizePathForCache(ProjectRoot);
{
// If the project root lives in GarbageCollectedProjectRoots, it implies
// we've already started GC on the cache root.
std::lock_guard<std::mutex> Lock(GarbageCollectedProjectRootsMutex);
if (!GarbageCollectedProjectRoots.insert(NormalizedProjectRoot).second)
return;
}
llvm::SmallString<256> CacheRoot(ProjectRoot);
llvm::sys::path::append(CacheRoot, ".cache", "clangd", "modules");
log("Running GC pass for clangd built module files under {0} with age "
"threshold {1} seconds (adjust with --modules-builder-versioned-gc-"
"threshold-seconds)",
CacheRoot, VersionedModuleFileGCThresholdSeconds);
garbageCollectModuleCache(CacheRoot);
log("Done running GC pass for clangd built module files under {0}",
CacheRoot);
}
void ModulesBuilder::ModulesBuilderImpl::getPrebuiltModuleFile(
StringRef ModuleName, PathRef ModuleUnitFileName, const ThreadsafeFS &TFS,
ReusablePrerequisiteModules &BuiltModuleFiles) {
@@ -1053,6 +1139,9 @@ llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
if (!Cmd)
return llvm::createStringError(
llvm::formatv("No compile command for {0}", ReqFileName));
if (auto PI = getCDB().getProjectInfo(ReqFileName);
PI && !PI->SourceRoot.empty())
garbageCollectModuleCacheForProjectRoot(PI->SourceRoot);
const std::string CommandHash = getCompileCommandStringHash(*Cmd);
const std::string PublishedModuleFilePath = getPublishedModuleFilePath(

View File

@@ -20,9 +20,12 @@
#include "support/Path.h"
#include "support/ThreadsafeFS.h"
#include "clang/Tooling/Tooling.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
@@ -1346,6 +1349,172 @@ export int AValue = MValue;
EXPECT_TRUE(llvm::sys::fs::exists(LockPath));
}
TEST_F(PrerequisiteModulesTests,
PersistentModuleCacheGCRemovesOldStablePublishedModule) {
PerFileModulesCompilationDatabase CDB(TestDir, FS);
CDB.addFile("M.cppm", R"cpp(
export module M;
export int MValue = 43;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
export int AValue = MValue;
)cpp");
llvm::SmallString<256> OrphanPCMPath;
{
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
HeaderSearchOptions HS(TestDir);
AInfo->adjustHeaderSearchOptions(HS);
ASSERT_EQ(HS.PrebuiltModuleFiles.count("M"), 1u);
OrphanPCMPath = HS.PrebuiltModuleFiles["M"];
llvm::sys::path::remove_filename(OrphanPCMPath);
llvm::sys::path::append(OrphanPCMPath, "Orphan.pcm");
std::error_code EC;
llvm::raw_fd_ostream OS(OrphanPCMPath, EC);
ASSERT_FALSE(EC);
OS << "orphan";
OS.close();
EXPECT_TRUE(llvm::sys::fs::exists(OrphanPCMPath));
int FD = -1;
ASSERT_FALSE(llvm::sys::fs::openFileForWrite(OrphanPCMPath, FD,
llvm::sys::fs::CD_OpenExisting,
llvm::sys::fs::OF_None));
auto CloseFD = llvm::scope_exit(
[&] { llvm::sys::Process::SafelyCloseFileDescriptor(FD); });
llvm::sys::TimePoint<> OldTime =
std::chrono::system_clock::now() - std::chrono::hours(24 * 5);
ASSERT_FALSE(llvm::sys::fs::setLastAccessAndModificationTime(FD, OldTime));
}
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
EXPECT_FALSE(llvm::sys::fs::exists(OrphanPCMPath));
}
TEST_F(PrerequisiteModulesTests,
PersistentModuleCacheGCKeepsRecentStablePublishedModule) {
PerFileModulesCompilationDatabase CDB(TestDir, FS);
CDB.addFile("M.cppm", R"cpp(
export module M;
export int MValue = 43;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
export int AValue = MValue;
)cpp");
llvm::SmallString<256> OrphanPCMPath;
{
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
HeaderSearchOptions HS(TestDir);
AInfo->adjustHeaderSearchOptions(HS);
ASSERT_EQ(HS.PrebuiltModuleFiles.count("M"), 1u);
OrphanPCMPath = HS.PrebuiltModuleFiles["M"];
llvm::sys::path::remove_filename(OrphanPCMPath);
llvm::sys::path::append(OrphanPCMPath, "Orphan.pcm");
std::error_code EC;
llvm::raw_fd_ostream OS(OrphanPCMPath, EC);
ASSERT_FALSE(EC);
OS << "orphan";
OS.close();
EXPECT_TRUE(llvm::sys::fs::exists(OrphanPCMPath));
}
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
EXPECT_TRUE(llvm::sys::fs::exists(OrphanPCMPath));
}
TEST_F(PrerequisiteModulesTests,
PersistentModuleCacheGCRemovesOldVersionedModuleFile) {
PerFileModulesCompilationDatabase CDB(TestDir, FS);
CDB.addFile("M.cppm", R"cpp(
export module M;
export int MValue = 43;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
export int AValue = MValue;
)cpp");
llvm::SmallString<256> OldVersionedPCMPath;
{
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
HeaderSearchOptions HS(TestDir);
AInfo->adjustHeaderSearchOptions(HS);
ASSERT_EQ(HS.PrebuiltModuleFiles.count("M"), 1u);
OldVersionedPCMPath = HS.PrebuiltModuleFiles["M"];
ASSERT_TRUE(llvm::sys::fs::exists(OldVersionedPCMPath));
int FD = -1;
ASSERT_FALSE(llvm::sys::fs::openFileForWrite(OldVersionedPCMPath, FD,
llvm::sys::fs::CD_OpenExisting,
llvm::sys::fs::OF_None));
auto CloseFD = llvm::scope_exit(
[&] { llvm::sys::Process::SafelyCloseFileDescriptor(FD); });
llvm::sys::TimePoint<> OldTime =
std::chrono::system_clock::now() - std::chrono::hours(24 * 5);
ASSERT_FALSE(llvm::sys::fs::setLastAccessAndModificationTime(FD, OldTime));
}
ModulesBuilder Builder(CDB);
auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
EXPECT_FALSE(llvm::sys::fs::exists(OldVersionedPCMPath));
}
TEST_F(PrerequisiteModulesTests,
PersistentModuleCacheGCKeepsRecentVersionedModuleFile) {
PerFileModulesCompilationDatabase CDB(TestDir, FS);
CDB.addFile("M.cppm", R"cpp(
export module M;
export int MValue = 43;
)cpp");
CDB.addFile("A.cppm", R"cpp(
export module A;
import M;
export int AValue = MValue;
)cpp");
auto FirstBuilder = std::make_unique<ModulesBuilder>(CDB);
auto AInfo =
FirstBuilder->buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(AInfo);
HeaderSearchOptions HS(TestDir);
AInfo->adjustHeaderSearchOptions(HS);
ASSERT_EQ(HS.PrebuiltModuleFiles.count("M"), 1u);
llvm::StringRef CopyOnReadPCMPath = HS.PrebuiltModuleFiles["M"];
ASSERT_TRUE(llvm::sys::fs::exists(CopyOnReadPCMPath));
ModulesBuilder SecondBuilder(CDB);
auto SecondInfo =
SecondBuilder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS);
ASSERT_TRUE(SecondInfo);
EXPECT_TRUE(llvm::sys::fs::exists(CopyOnReadPCMPath));
}
TEST_F(PrerequisiteModulesTests,
PersistentModuleCacheIgnoresRequiredSourceForOnDiskPath) {
ModuleUnitRootCompilationDatabase CDB(TestDir, FS);