[clangd] Add background index path remapping tool (#185482)

Introduce a standalone tool to remap paths inside clangd background
index files. This allows users to pay the cost of generating a full
background index once, then reuse the background index across multiple
clients. Each client rewrites the background index in place after
copying the original, specifying the path(s) to be remapped.

This is an alternative approach to the dynamic path remapping proposed
in #180285.

Fixes clangd/clangd#847

Assisted-by: claude
This commit is contained in:
Justin Cady
2026-03-24 11:21:06 -04:00
committed by GitHub
parent 4a2b384f15
commit c1ea10a854
5 changed files with 382 additions and 0 deletions

View File

@@ -201,6 +201,7 @@ if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
endif()
add_subdirectory(tool)
add_subdirectory(indexer)
add_subdirectory(remap)
if (LLVM_INCLUDE_BENCHMARKS)
add_subdirectory(benchmarks)

View File

@@ -0,0 +1,17 @@
set(LLVM_LINK_COMPONENTS
Support
)
add_clang_executable(clangd-remap
RemapMain.cpp
)
clang_target_link_libraries(clangd-remap
PRIVATE
clangBasic
)
target_link_libraries(clangd-remap
PRIVATE
clangDaemon
clangdSupport
)

View File

@@ -0,0 +1,345 @@
//===--- RemapMain.cpp - Remap paths in background index shards -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// clangd-remap is a standalone tool that rewrites paths inside every .idx shard
// in a background index directory. An index generated on one machine (or at one
// workspace path) can be remapped and reused within a source tree at a
// different location.
//
// Usage:
// clangd-remap --path-mappings=/old/root=/new/root /path/to/index-dir
//
//===----------------------------------------------------------------------===//
#include "Headers.h"
#include "PathMapping.h"
#include "SourceCode.h"
#include "URI.h"
#include "index/Ref.h"
#include "index/Serialization.h"
#include "index/Symbol.h"
#include "support/Logger.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/raw_ostream.h"
#include <atomic>
#include <string>
#include <vector>
namespace clang {
namespace clangd {
namespace {
static llvm::cl::OptionCategory RemapCategory("clangd-remap options");
static llvm::cl::opt<std::string> PathMappingsArg{
"path-mappings",
llvm::cl::cat(RemapCategory),
llvm::cl::desc(
"List of path mappings applied to every string in each background "
"index shard. Format: /old/path=/new/path[,/old2=/new2,...]"),
llvm::cl::Required,
};
static llvm::cl::opt<std::string> IndexDir{
llvm::cl::desc("<index-dir>"),
llvm::cl::cat(RemapCategory),
llvm::cl::Positional,
llvm::cl::Required,
};
static llvm::cl::opt<unsigned> NumThreads{
"j",
llvm::cl::cat(RemapCategory),
llvm::cl::desc("Number of worker threads (0 = all)"),
llvm::cl::init(0),
};
static llvm::cl::opt<Logger::Level> LogLevel{
"log",
llvm::cl::cat(RemapCategory),
llvm::cl::desc("Verbosity of log messages written to stderr"),
llvm::cl::values(
clEnumValN(Logger::Error, "error", "Error messages only"),
clEnumValN(Logger::Info, "info", "High level execution tracing"),
clEnumValN(Logger::Debug, "verbose", "Low level details")),
llvm::cl::init(Logger::Info),
};
// Apply a path mapping to a URI or raw path string
//
// Ex. given "-I/old/root/include" and mapping /old/root=/new/root, the result
// is "-I/new/root/include"
std::optional<std::string> remapString(llvm::StringRef S,
const PathMappings &Mappings) {
// Client = old path, Server = new path; ClientToServer maps old -> new
if (S.starts_with("file://"))
return doPathMapping(S, PathMapping::Direction::ClientToServer, Mappings);
// For non-URI strings (compilation flags, directory paths, etc.) only match
// at the first '/' (where an absolute path begins)
// FIXME: This does not handle Windows paths; only POSIX paths are supported.
size_t FirstSlash = S.find('/');
if (FirstSlash == llvm::StringRef::npos)
return std::nullopt;
for (const auto &Mapping : Mappings) {
size_t Pos = S.find(Mapping.ClientPath);
if (Pos == FirstSlash) {
llvm::StringRef After = S.substr(Pos + Mapping.ClientPath.size());
// Ensure a full path-component match: "/old" must not match "/older"
if (After.empty() || After.front() == '/')
return (S.substr(0, Pos) + Mapping.ServerPath + After).str();
}
}
return std::nullopt;
}
// Remap a StringRef in-place, saving the result into the Arena so the
// pointer remains valid
void remapRef(llvm::StringRef &S, const PathMappings &Mappings,
llvm::StringSaver &Saver) {
if (auto R = remapString(S, Mappings))
S = Saver.save(std::move(*R));
}
// Like remapRef, but _always_ saves into Saver (even on no match). Used for
// StringRefs that will outlive their original storage.
void remapOrCopyRef(llvm::StringRef &S, const PathMappings &Mappings,
llvm::StringSaver &Saver) {
if (auto R = remapString(S, Mappings))
S = Saver.save(std::move(*R));
else
S = Saver.save(S);
}
void remapCharURI(const char *&P, const PathMappings &Mappings,
llvm::StringSaver &Saver) {
llvm::StringRef S(P);
if (auto R = remapString(S, Mappings))
P = Saver.save(std::move(*R)).data();
}
void remapStdStr(std::string &S, const PathMappings &Mappings) {
if (auto R = remapString(S, Mappings))
S = std::move(*R);
}
std::vector<std::string> collectShards(llvm::StringRef Dir) {
std::vector<std::string> Paths;
std::error_code EC;
for (llvm::sys::fs::recursive_directory_iterator It(Dir, EC), End;
It != End && !EC; It.increment(EC)) {
if (llvm::sys::path::extension(It->path()) == ".idx")
Paths.push_back(It->path());
}
if (EC)
elog("Error scanning directory {0}: {1}", Dir, EC.message());
return Paths;
}
// Compute shard filename for a source path. (See getShardPathFromFilePath()
// in BackgroundIndexStorage.cpp.)
std::string shardName(llvm::StringRef SourceFilePath) {
return (llvm::sys::path::filename(SourceFilePath) + "." +
llvm::toHex(digest(SourceFilePath)) + ".idx")
.str();
}
// For each source entry, resolve its URI to get the original absolute path and
// compute that shard name. Find the entry whose shard name matches, and apply
// the path mappings to that path to compute the new shard name.
//
// This must be called before remapIndexData(), since it needs the original (not
// remapped) URIs.
std::string deriveNewFilename(const IndexFileIn &Data,
llvm::StringRef OldFilename,
const PathMappings &Mappings) {
if (!Data.Sources || Data.Sources->empty())
return OldFilename.str();
for (const auto &Entry : *Data.Sources) {
auto U = URI::parse(Entry.first());
if (!U) {
llvm::consumeError(U.takeError());
continue;
}
auto Path = URI::resolve(*U);
if (!Path) {
llvm::consumeError(Path.takeError());
continue;
}
if (shardName(*Path) == OldFilename) {
std::string NewPath = *Path;
remapStdStr(NewPath, Mappings);
return shardName(NewPath);
}
}
return OldFilename.str();
}
// Remap all paths inside a parsed IndexFileIn in-place. Saver is used to
// allocate new strings for fields stored as StringRef or raw pointers.
void remapIndexData(IndexFileIn &Data, const PathMappings &Mappings,
llvm::StringSaver &Saver) {
if (Data.Symbols) {
// SymbolSlab is immutable, so we rebuild it
SymbolSlab::Builder Builder;
for (const auto &Sym : *Data.Symbols) {
Symbol S = Sym;
remapCharURI(S.CanonicalDeclaration.FileURI, Mappings, Saver);
remapCharURI(S.Definition.FileURI, Mappings, Saver);
for (auto &Inc : S.IncludeHeaders)
remapRef(Inc.IncludeHeader, Mappings, Saver);
Builder.insert(S);
}
Data.Symbols = std::move(Builder).build();
}
if (Data.Refs) {
RefSlab::Builder Builder;
for (const auto &Entry : *Data.Refs) {
for (const auto &R : Entry.second) {
Ref MR = R; // mutable copy
remapCharURI(MR.Location.FileURI, Mappings, Saver);
Builder.insert(Entry.first, MR);
}
}
Data.Refs = std::move(Builder).build();
}
// We must rebuild the StringMap because keys may change. All StringRef
// fields (URI, DirectIncludes) are saved into Saver because the old
// StringMap is destroyed below.
if (Data.Sources) {
IncludeGraph NewSources;
for (auto &Entry : *Data.Sources) {
IncludeGraphNode IGN = Entry.getValue();
remapOrCopyRef(IGN.URI, Mappings, Saver);
for (auto &Inc : IGN.DirectIncludes)
remapOrCopyRef(Inc, Mappings, Saver);
NewSources[IGN.URI] = std::move(IGN);
}
Data.Sources = std::move(NewSources);
}
if (Data.Cmd) {
remapStdStr(Data.Cmd->Directory, Mappings);
for (auto &Arg : Data.Cmd->CommandLine)
remapStdStr(Arg, Mappings);
remapStdStr(Data.Cmd->Filename, Mappings);
}
}
} // namespace
} // namespace clangd
} // namespace clang
int main(int Argc, const char **Argv) {
using namespace clang::clangd;
llvm::sys::PrintStackTraceOnErrorSignal(Argv[0]);
llvm::cl::HideUnrelatedOptions(RemapCategory);
llvm::cl::ParseCommandLineOptions(Argc, Argv,
"clangd-remap: rewrite paths inside "
"background-index .idx shards\n");
StreamLogger Logger(llvm::errs(), LogLevel);
LoggingSession LoggingSession(Logger);
auto Mappings = parsePathMappings(PathMappingsArg);
if (!Mappings) {
elog("Invalid --path-mappings: {0}", Mappings.takeError());
return 1;
}
if (Mappings->empty()) {
elog("No path mappings specified.");
return 1;
}
// Gather all shard files from the index directory.
auto AllShards = collectShards(IndexDir);
if (AllShards.empty()) {
log("No .idx files found in the specified directories.");
return 0;
}
log("Found {0} shard(s) to process.", AllShards.size());
for (const auto &M : *Mappings)
log(" Path mapping: {0}", M);
if (NumThreads.getValue() != 0)
llvm::parallel::strategy = llvm::hardware_concurrency(NumThreads);
std::atomic<unsigned> Errors{0};
std::atomic<unsigned> FilesRenamed{0};
std::atomic<unsigned> FilesUnchanged{0};
llvm::parallelFor(0, AllShards.size(), [&](size_t I) {
const std::string &ShardPath = AllShards[I];
auto Buf = llvm::MemoryBuffer::getFile(ShardPath);
if (!Buf) {
elog("Cannot read {0}: {1}", ShardPath, Buf.getError().message());
++Errors;
return;
}
auto Parsed = readIndexFile((*Buf)->getBuffer(), SymbolOrigin::Background);
if (!Parsed) {
elog("Cannot parse {0}: {1}", ShardPath, Parsed.takeError());
++Errors;
return;
}
// Derive the new shard filename before remapping, so we can match
// against original (un-remapped) source URIs.
llvm::StringRef OldFilename = llvm::sys::path::filename(ShardPath);
std::string NewFilename =
deriveNewFilename(*Parsed, OldFilename, *Mappings);
// Remap all paths in the parsed data
llvm::BumpPtrAllocator Arena;
llvm::StringSaver Saver(Arena);
remapIndexData(*Parsed, *Mappings, Saver);
// Write the remapped shard (possibly under a new name)
llvm::StringRef ParentDir = llvm::sys::path::parent_path(ShardPath);
llvm::SmallString<256> NewPath(ParentDir);
llvm::sys::path::append(NewPath, NewFilename);
if (auto Err = llvm::writeToOutput(NewPath, [&](llvm::raw_ostream &OS) {
IndexFileOut Out(*Parsed);
Out.Format = IndexFileFormat::RIFF;
OS << Out;
return llvm::Error::success();
})) {
elog("Cannot write {0}: {1}", NewPath, std::move(Err));
++Errors;
return;
}
// If the filename changed, remove the old shard
if (NewFilename != OldFilename) {
llvm::sys::fs::remove(ShardPath);
++FilesRenamed;
} else
++FilesUnchanged;
});
unsigned Renamed = FilesRenamed.load();
unsigned Unchanged = FilesUnchanged.load();
log("Processed: {0} shard(s), {1} renamed, {2} unchanged, {3} error(s).",
Renamed + Unchanged, Renamed, Unchanged, Errors.load());
return Errors.load() > 0 ? 1 : 0;
}

View File

@@ -2,6 +2,7 @@ set(CLANGD_TEST_DEPS
clangd
ClangdTests
clangd-indexer
clangd-remap
split-file
)

View File

@@ -0,0 +1,18 @@
# Paths are not constructed correctly for the test to run on Windows
# UNSUPPORTED: system-windows
#
# RUN: rm -rf %t && mkdir -p %t/src %t/index
# RUN: echo 'void hello();' > %t/src/test.h
# RUN: printf '#include "test.h"\nvoid hello() {}\n' > %t/src/test.cpp
# RUN: clangd-indexer %t/src/test.cpp -- -I%t/src > %t/index/test.idx
#
# Remap %t/src -> /remapped/path inside the shard
# RUN: clangd-remap --path-mappings '%t/src=/remapped/path' %t/index 2>&1 \
# RUN: | FileCheck -check-prefix=REMAP %s
# REMAP: Found 1 shard(s) to process
# REMAP: Processed: 1 shard(s), 0 renamed, 1 unchanged, 0 error(s)
#
# Verify the remapped path appears in the symbol data
# RUN: dexp %t/index/test.idx -c="lookup -name hello" \
# RUN: | FileCheck -check-prefix=LOOKUP %s
# LOOKUP: file:///remapped/path/test.h