[clang-doc] Support deep copy between arenas for merging (#190055)

Upcoming changes to the merge step will necessitate that we clear the
transient arenas and merge new items into the persistent arena. However
there are some challenges with that, as the existing types typically
don't want to be copied. We introduce some new APIs to simplify that
task and ensure we don't accidentally leak memory.

On the performance front, we reclaim about 2% of the overhead, bringing
the cumulative overhead from the series of patches down to about 7% over
the baseline.

| Metric | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| Time | 920.5s | 1014.5s | 991.5s | +7.7% | -2.3% |
| Memory | 86.0G | 39.9G | 40.0G | -53.4% | +0.3% |

| Benchmark | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| BM_BitcodeReader_Scale/10 | 67.9us | 73.0us | 72.2us | +6.3% | -1.1% |
| BM_BitcodeReader_Scale/10000 | 70.5ms | 17.1ms | 22.5ms | -68.1% |
+31.6% |
| BM_BitcodeReader_Scale/4096 | 23.2ms | 6.5ms | 6.6ms | -71.6% | +1.1%
|
| BM_BitcodeReader_Scale/512 | 509.4us | 850.1us | 898.7us | +76.4% |
+5.7% |
| BM_BitcodeReader_Scale/64 | 114.8us | 140.6us | 133.7us | +16.4% |
-5.0% |
| BM_EmitInfoFunction | 1.6us | 1.8us | 1.9us | +16.8% | +3.4% |
| BM_Index_Insertion/10 | 2.3us | 4.1us | 4.1us | +78.7% | -0.5% |
| BM_Index_Insertion/10000 | 3.1ms | 5.4ms | 5.3ms | +70.5% | -0.4% |
| BM_Index_Insertion/4096 | 1.3ms | 2.1ms | 2.1ms | +66.0% | +0.7% |
| BM_Index_Insertion/512 | 153.6us | 252.9us | 251.8us | +64.0% | -0.4%
|
| BM_Index_Insertion/64 | 18.1us | 30.4us | 30.2us | +67.4% | -0.5% |
| BM_JSONGenerator_Scale/10 | 36.8us | 38.1us | 37.1us | +0.6% | -2.8% |
| BM_JSONGenerator_Scale/10000 | 89.6ms | 84.2ms | 81.4ms | -9.1% |
-3.3% |
| BM_JSONGenerator_Scale/4096 | 33.7ms | 32.1ms | 31.0ms | -8.1% | -3.6%
|
| BM_JSONGenerator_Scale/512 | 1.9ms | 2.0ms | 1.9ms | -0.1% | -3.1% |
| BM_JSONGenerator_Scale/64 | 222.4us | 232.5us | 222.9us | +0.2% |
-4.1% |
| BM_Mapper_Scale/10 | 2.5ms | 2.5ms | 2.5ms | -1.0% | -0.4% |
| BM_Mapper_Scale/10000 | 104.3ms | 102.1ms | 112.3ms | +7.7% | +10.0% |
| BM_Mapper_Scale/4096 | 44.3ms | 46.5ms | 45.0ms | +1.4% | -3.4% |
| BM_Mapper_Scale/512 | 7.6ms | 7.4ms | 7.7ms | +1.1% | +3.3% |
| BM_Mapper_Scale/64 | 3.1ms | 3.0ms | 3.0ms | -1.5% | -0.2% |
| BM_MergeInfos_Scale/10000 | 12.2ms | 931.2us | 575.6us | -95.3% |
-38.2% |
| BM_MergeInfos_Scale/2 | 1.9us | 1.8us | 1.8us | -2.7% | +1.9% |
| BM_MergeInfos_Scale/4096 | 2.8ms | 201.3us | 205.3us | -92.6% | +1.9%
|
| BM_MergeInfos_Scale/512 | 68.9us | 19.9us | 20.5us | -70.2% | +3.4% |
| BM_MergeInfos_Scale/64 | 10.3us | 4.1us | 3.8us | -62.6% | -7.1% |
| BM_MergeInfos_Scale/8 | 2.8us | 1.9us | 1.9us | -30.5% | +4.2% |
| BM_SerializeFunctionInfo | 25.5us | 26.2us | 25.8us | +0.9% | -1.5% |
This commit is contained in:
Paul Kirth
2026-04-10 14:55:49 -07:00
committed by GitHub
parent 93c10f0820
commit c70dae8b0c
2 changed files with 170 additions and 21 deletions

View File

@@ -115,7 +115,8 @@ static void reduceChildren(llvm::simple_ilist<T> &Children,
auto It = llvm::find_if(
Children, [&](const T &C) { return C.USR == ChildToMerge->USR; });
if (It == Children.end()) {
Children.push_back(*ChildToMerge);
T *NewChild = allocatePtr<T>(PersistentArena, std::move(*ChildToMerge));
Children.push_back(*NewChild);
} else {
It->merge(std::move(*ChildToMerge));
}
@@ -124,11 +125,14 @@ static void reduceChildren(llvm::simple_ilist<T> &Children,
template <typename Container>
static void mergeUnkeyed(Container &Target, Container &&Source) {
using T = typename Container::value_type;
while (!Source.empty()) {
auto &Item = Source.front();
Source.pop_front();
if (llvm::none_of(Target, [&](const auto &E) { return E == Item; }))
Target.push_back(Item);
if (llvm::none_of(Target, [&](const auto &E) { return E == Item; })) {
T *NewItem = allocatePtr<T>(PersistentArena, std::move(Item));
Target.push_back(*NewItem);
}
}
}
@@ -162,6 +166,20 @@ llvm::Expected<OwnedPtr<Info>> mergeInfos(OwningPtrArray<Info> &Values) {
llvm_unreachable("unhandled enumerator");
}
TemplateSpecializationInfo::TemplateSpecializationInfo(
const TemplateSpecializationInfo &Other, llvm::BumpPtrAllocator &Arena)
: SpecializationOf(Other.SpecializationOf) {
Params = allocateArray(Other.Params, Arena);
}
TemplateInfo::TemplateInfo(const TemplateInfo &Other,
llvm::BumpPtrAllocator &Arena) {
Params = allocateArray(Other.Params, Arena);
if (Other.Specialization)
Specialization = TemplateSpecializationInfo(*Other.Specialization, Arena);
Constraints = allocateArray(Other.Constraints, Arena);
}
bool CommentInfo::operator==(const CommentInfo &Other) const {
auto FirstCI = std::tie(Kind, Text, Name, Direction, ParamName, CloseName,
SelfClosing, Explicit, AttrKeys, AttrValues, Args);
@@ -197,6 +215,28 @@ bool CommentInfo::operator<(const CommentInfo &Other) const {
return false;
}
CommentInfo::CommentInfo(const CommentInfo &Other,
llvm::BumpPtrAllocator &Arena) {
Kind = Other.Kind;
Direction = Other.Direction;
Name = Other.Name;
ParamName = Other.ParamName;
CloseName = Other.CloseName;
SelfClosing = Other.SelfClosing;
Explicit = Other.Explicit;
Text = Other.Text;
AttrKeys = allocateArray(Other.AttrKeys, Arena);
AttrValues = allocateArray(Other.AttrValues, Arena);
Args = allocateArray(Other.Args, Arena);
if (!Other.Children.empty()) {
CommentInfo *NewArray = Arena.Allocate<CommentInfo>(Other.Children.size());
for (size_t Idx = 0; Idx < Other.Children.size(); ++Idx) {
new (NewArray + Idx) CommentInfo(Other.Children[Idx], Arena);
}
Children = llvm::ArrayRef<CommentInfo>(NewArray, Other.Children.size());
}
}
static llvm::SmallString<64>
calculateRelativeFilePath(const InfoType &Type, const StringRef &Path,
const StringRef &Name, const StringRef &CurrentPath) {
@@ -267,6 +307,31 @@ void FriendInfo::merge(FriendInfo &&Other) {
SymbolInfo::merge(std::move(Other));
}
FriendInfo::FriendInfo(const FriendInfo &Other, llvm::BumpPtrAllocator &Arena)
: SymbolInfo(Other, Arena) {
Ref = Other.Ref;
if (Other.Template)
Template.emplace(*Other.Template, Arena);
if (Other.ReturnType)
ReturnType = Other.ReturnType;
if (!Other.Params.empty())
Params = allocateArray(Other.Params, Arena);
IsClass = Other.IsClass;
}
Info::Info(const Info &Other, llvm::BumpPtrAllocator &Arena)
: Path(Other.Path), Name(Other.Name),
DocumentationFileName(Other.DocumentationFileName), USR(Other.USR),
ParentUSR(Other.ParentUSR), IT(Other.IT) {
Namespace = allocateArray(Other.Namespace, Arena);
if (!Other.Description.empty()) {
for (const auto &Desc : Other.Description) {
CommentInfo *NewDesc = allocatePtr<CommentInfo>(Arena, Desc, Arena);
Description.push_back(*NewDesc);
}
}
}
void Info::mergeBase(Info &&Other) {
assert(mergeable(Other));
if (USR == EmptySID)
@@ -289,6 +354,17 @@ bool Info::mergeable(const Info &Other) {
return IT == Other.IT && USR == Other.USR;
}
SymbolInfo::SymbolInfo(const SymbolInfo &Other, llvm::BumpPtrAllocator &Arena)
: Info(Other, Arena), DefLoc(Other.DefLoc), MangledName(Other.MangledName),
IsStatic(Other.IsStatic) {
if (!Other.Loc.empty()) {
for (const auto &L : Other.Loc) {
Location *NewL = allocatePtr<Location>(Arena, L);
Loc.push_back(*NewL);
}
}
}
void SymbolInfo::merge(SymbolInfo &&Other) {
assert(mergeable(Other));
if (!DefLoc)
@@ -319,21 +395,47 @@ void NamespaceInfo::merge(NamespaceInfo &&Other) {
RecordInfo::RecordInfo(SymbolID USR, StringRef Name, StringRef Path)
: SymbolInfo(InfoType::IT_record, USR, Name, Path) {}
// FIXME: This constructor is currently unsafe for cross-arena copies of
// populated records. Because a default copy of ScopeChildren will shallow-copy
// the intrusive pointers, leading to a use-after-free when the TransientArena
// is reset. Subsequent patches will address this by deep-copying children
// individually via reduceChildren.
RecordInfo::RecordInfo(const RecordInfo &Other, llvm::BumpPtrAllocator &Arena)
: SymbolInfo(Other, Arena), TagType(Other.TagType),
IsTypeDef(Other.IsTypeDef) {
Members = deepCopyArray(Other.Members, Arena);
Parents = allocateArray(Other.Parents, Arena);
VirtualParents = allocateArray(Other.VirtualParents, Arena);
Bases = deepCopyArray(Other.Bases, Arena);
Friends = deepCopyArray(Other.Friends, Arena);
}
MemberTypeInfo::MemberTypeInfo(const MemberTypeInfo &Other,
llvm::BumpPtrAllocator &Arena)
: FieldTypeInfo(Other), Access(Other.Access), IsStatic(Other.IsStatic) {
if (!Other.Description.empty()) {
for (const auto &Desc : Other.Description) {
CommentInfo *NewDesc = allocatePtr<CommentInfo>(Arena, Desc, Arena);
Description.push_back(*NewDesc);
}
}
}
void RecordInfo::merge(RecordInfo &&Other) {
assert(mergeable(Other));
if (!llvm::to_underlying(TagType))
TagType = Other.TagType;
IsTypeDef = IsTypeDef || Other.IsTypeDef;
if (Members.empty())
Members = std::move(Other.Members);
if (Bases.empty())
Bases = std::move(Other.Bases);
if (Parents.empty())
Parents = std::move(Other.Parents);
if (VirtualParents.empty())
VirtualParents = std::move(Other.VirtualParents);
if (Friends.empty())
Friends = std::move(Other.Friends);
if (Members.empty() && !Other.Members.empty())
Members = deepCopyArray(Other.Members, PersistentArena);
if (Bases.empty() && !Other.Bases.empty())
Bases = deepCopyArray(Other.Bases, PersistentArena);
if (Parents.empty() && !Other.Parents.empty())
Parents = allocateArray(Other.Parents, PersistentArena);
if (VirtualParents.empty() && !Other.VirtualParents.empty())
VirtualParents = allocateArray(Other.VirtualParents, PersistentArena);
if (Friends.empty() && !Other.Friends.empty())
Friends = deepCopyArray(Other.Friends, PersistentArena);
// Reduce children if necessary.
reduceChildren(Children.Records, std::move(Other.Children.Records));
reduceChildren(Children.Functions, std::move(Other.Children.Functions));
@@ -344,12 +446,23 @@ void RecordInfo::merge(RecordInfo &&Other) {
Template = Other.Template;
}
EnumValueInfo::EnumValueInfo(const EnumValueInfo &Other,
llvm::BumpPtrAllocator &Arena)
: Name(Other.Name), Value(Other.Value), ValueExpr(Other.ValueExpr) {
if (!Other.Description.empty()) {
for (const auto &Desc : Other.Description) {
CommentInfo *NewDesc = allocatePtr<CommentInfo>(Arena, Desc, Arena);
Description.push_back(*NewDesc);
}
}
}
void EnumInfo::merge(EnumInfo &&Other) {
assert(mergeable(Other));
if (!Scoped)
Scoped = Other.Scoped;
if (Members.empty())
Members = std::move(Other.Members);
if (Members.empty() && !Other.Members.empty())
Members = deepCopyArray(Other.Members, PersistentArena);
SymbolInfo::merge(std::move(Other));
}
@@ -363,8 +476,8 @@ void FunctionInfo::merge(FunctionInfo &&Other) {
ReturnType = std::move(Other.ReturnType);
if (Parent.USR == EmptySID && Parent.Name == "")
Parent = std::move(Other.Parent);
if (Params.empty())
Params = std::move(Other.Params);
if (Params.empty() && !Other.Params.empty())
Params = allocateArray(Other.Params, PersistentArena);
SymbolInfo::merge(std::move(Other));
if (!Template)
Template = Other.Template;
@@ -387,10 +500,11 @@ void ConceptInfo::merge(ConceptInfo &&Other) {
IsType = Other.IsType;
if (ConstraintExpression.empty())
ConstraintExpression = std::move(Other.ConstraintExpression);
if (Template.Constraints.empty())
Template.Constraints = std::move(Other.Template.Constraints);
if (Template.Params.empty())
Template.Params = std::move(Other.Template.Params);
if (Template.Constraints.empty() && !Other.Template.Constraints.empty())
Template.Constraints =
allocateArray(Other.Template.Constraints, PersistentArena);
if (Template.Params.empty() && !Other.Template.Params.empty())
Template.Params = allocateArray(Other.Template.Params, PersistentArena);
SymbolInfo::merge(std::move(Other));
}
@@ -405,6 +519,11 @@ void VarInfo::merge(VarInfo &&Other) {
BaseRecordInfo::BaseRecordInfo() : RecordInfo() {}
BaseRecordInfo::BaseRecordInfo(const BaseRecordInfo &Other,
llvm::BumpPtrAllocator &Arena)
: RecordInfo(Other, Arena), Access(Other.Access),
IsVirtual(Other.IsVirtual), IsParent(Other.IsParent) {}
BaseRecordInfo::BaseRecordInfo(SymbolID USR, StringRef Name, StringRef Path,
bool IsVirtual, AccessSpecifier Access,
bool IsParent)

View File

@@ -91,6 +91,18 @@ llvm::ArrayRef<T> allocateArray(llvm::ArrayRef<T> V,
return llvm::ArrayRef<T>(Allocated, V.size());
}
template <typename T>
llvm::ArrayRef<T> deepCopyArray(llvm::ArrayRef<T> V,
llvm::BumpPtrAllocator &Alloc) {
if (V.empty())
return llvm::ArrayRef<T>();
T *Allocated = (T *)Alloc.Allocate<T>(V.size());
for (size_t Idx = 0; Idx < V.size(); ++Idx) {
new (Allocated + Idx) T(V[Idx], Alloc);
}
return llvm::ArrayRef<T>(Allocated, V.size());
}
// An abstraction for owned pointers. Initially mapped to OwnedPtr,
// to be eventually transitioned to bare pointers in an arena.
template <typename T> using OwnedPtr = T *;
@@ -180,6 +192,7 @@ struct CommentInfo : public llvm::ilist_node<CommentInfo> {
CommentInfo() = default;
CommentInfo(const CommentInfo &Other) = default;
CommentInfo &operator=(const CommentInfo &Other) = default;
CommentInfo(const CommentInfo &Other, llvm::BumpPtrAllocator &Arena);
CommentInfo(CommentInfo &&Other) = default;
CommentInfo &operator=(CommentInfo &&Other) = default;
@@ -349,6 +362,10 @@ struct TemplateParamInfo {
};
struct TemplateSpecializationInfo {
TemplateSpecializationInfo() = default;
TemplateSpecializationInfo(const TemplateSpecializationInfo &Other,
llvm::BumpPtrAllocator &Arena);
// Indicates the declaration that this specializes.
SymbolID SpecializationOf;
@@ -368,6 +385,9 @@ struct ConstraintInfo {
// Records the template information for a struct or function that is a template
// or an explicit template specialization.
struct TemplateInfo {
TemplateInfo() = default;
TemplateInfo(const TemplateInfo &Other, llvm::BumpPtrAllocator &Arena);
// May be empty for non-partial specializations.
llvm::ArrayRef<TemplateParamInfo> Params;
@@ -399,6 +419,7 @@ struct FieldTypeInfo : public TypeInfo {
// Info for member types.
struct MemberTypeInfo : public FieldTypeInfo {
MemberTypeInfo() = default;
MemberTypeInfo(const MemberTypeInfo &Other, llvm::BumpPtrAllocator &Arena);
MemberTypeInfo(const TypeInfo &TI, StringRef Name, AccessSpecifier Access,
bool IsStatic = false)
: FieldTypeInfo(TI, Name), Access(Access), IsStatic(IsStatic) {}
@@ -455,6 +476,7 @@ struct Info {
StringRef Name = StringRef(), StringRef Path = StringRef())
: Path(internString(Path)), Name(internString(Name)), USR(USR), IT(IT) {}
Info(const Info &Other, llvm::BumpPtrAllocator &Arena);
Info(const Info &Other) = delete;
Info(Info &&Other) = default;
@@ -517,6 +539,8 @@ struct SymbolInfo : public Info {
StringRef Name = StringRef(), StringRef Path = StringRef())
: Info(IT, USR, Name, Path) {}
SymbolInfo(const SymbolInfo &Other, llvm::BumpPtrAllocator &Arena);
void merge(SymbolInfo &&I);
bool operator<(const SymbolInfo &Other) const {
@@ -546,6 +570,7 @@ struct FriendInfo : public SymbolInfo, public llvm::ilist_node<FriendInfo> {
FriendInfo(const InfoType IT, const SymbolID &USR,
const StringRef Name = StringRef())
: SymbolInfo(IT, USR, Name) {}
FriendInfo(const FriendInfo &Other, llvm::BumpPtrAllocator &Arena);
bool mergeable(const FriendInfo &Other);
void merge(FriendInfo &&Other);
@@ -597,6 +622,8 @@ struct RecordInfo : public SymbolInfo {
RecordInfo(SymbolID USR = SymbolID(), StringRef Name = StringRef(),
StringRef Path = StringRef());
RecordInfo(const RecordInfo &Other, llvm::BumpPtrAllocator &Arena);
void merge(RecordInfo &&I);
// Type of this record (struct, class, union, interface).
@@ -652,6 +679,7 @@ struct TypedefInfo : public SymbolInfo, public llvm::ilist_node<TypedefInfo> {
struct BaseRecordInfo : public RecordInfo,
public llvm::ilist_node<BaseRecordInfo> {
BaseRecordInfo();
BaseRecordInfo(const BaseRecordInfo &Other, llvm::BumpPtrAllocator &Arena);
BaseRecordInfo(SymbolID USR, StringRef Name, StringRef Path, bool IsVirtual,
AccessSpecifier Access, bool IsParent);
@@ -671,6 +699,8 @@ struct EnumValueInfo {
: Name(internString(Name)), Value(internString(Value)),
ValueExpr(internString(ValueExpr)) {}
EnumValueInfo(const EnumValueInfo &Other, llvm::BumpPtrAllocator &Arena);
bool operator==(const EnumValueInfo &Other) const {
return std::tie(Name, Value, ValueExpr) ==
std::tie(Other.Name, Other.Value, Other.ValueExpr);