In a PR last month I changed the ObjectFile CreateInstance etc methods to accept an optional DataExtractorSP instead of a DataBufferSP, and retain the extractor in a shared pointer internally in all of the ObjectFile subclasses. This is laying the groundwork for using a VirtualDataExtractor for some Mach-O binaries on macOS, where the segments of the binary are out-of-order in actual memory, and we add a lookup table to make it appear that the TEXT segment is at offset 0 in the Extractor, etc. Working on the actual implementation, I realized we were still using DataBufferSP's in ModuleSpec and Module, as well as in ObjectFile::GetModuleSpecifications. I originally was making a much larger NFC change where I had all ObjectFile subclasses operating on DataExtractors throughout their implementation, as well as in the DWARF parser. It was a very large patchset. Many subclasses start with their DataExtractor, then create smaller DataExtractors for parts of the binary image - the string table, the symbol table, etc., for processing. After consideration and discussion with Jonas, we agreed that a segment/section of a binary will never require a lookup table to access the bytes within it, so I changed VirtualDataExtractor::GetSubsetExtractorSP to (1) require that the Subset be contained within a single lookup table entry, and (2) return a simple DataExtractor bounded on that byte range. By doing this, I was able to remove all of my very-invasive changes to the ObjectFile subclass internals; it's only when they are operating on the entire binary image that care is needed. One pattern that subclasses like ObjectFileBreakpad use is to take an ArrayRef of the DataBuffer for a binary, then create a StringRef of that, then look for strings in it. With a VirtualDataExtractor and out-of-order binary segments, with gaps between them, this allows us to search the entire buffer looking for a string, and segfault when it gets to an unmapped region of the buffer. I added a VirtualDataExtractor::GetSubsetExtractorSP(0) which gets the largest contiguous memory region starting at offset 0 for this use case, and I added a comment about what was being done there because I know it is not obvious, and people not working on macOS wouldn't be familiar with the requirement. (when we have a ModuleSpec with a DataExtractor, any of the ObjectFile subclasses get a shot at Creating, so they all have to be able to iterate on these) rdar://148939795
188 lines
6.9 KiB
C++
188 lines
6.9 KiB
C++
//===-- ObjectFileBreakpad.cpp --------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h"
|
|
#include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h"
|
|
#include "lldb/Core/ModuleSpec.h"
|
|
#include "lldb/Core/PluginManager.h"
|
|
#include "lldb/Core/Section.h"
|
|
#include <optional>
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
using namespace lldb_private::breakpad;
|
|
|
|
LLDB_PLUGIN_DEFINE(ObjectFileBreakpad)
|
|
|
|
namespace {
|
|
struct Header {
|
|
ArchSpec arch;
|
|
UUID uuid;
|
|
static std::optional<Header> parse(llvm::StringRef text);
|
|
};
|
|
} // namespace
|
|
|
|
std::optional<Header> Header::parse(llvm::StringRef text) {
|
|
llvm::StringRef line;
|
|
std::tie(line, text) = text.split('\n');
|
|
auto Module = ModuleRecord::parse(line);
|
|
if (!Module)
|
|
return std::nullopt;
|
|
|
|
llvm::Triple triple;
|
|
triple.setArch(Module->Arch);
|
|
triple.setOS(Module->OS);
|
|
|
|
std::tie(line, text) = text.split('\n');
|
|
|
|
auto Info = InfoRecord::parse(line);
|
|
UUID uuid = Info && Info->ID ? Info->ID : Module->ID;
|
|
return Header{ArchSpec(triple), std::move(uuid)};
|
|
}
|
|
|
|
char ObjectFileBreakpad::ID;
|
|
|
|
void ObjectFileBreakpad::Initialize() {
|
|
PluginManager::RegisterPlugin(GetPluginNameStatic(),
|
|
GetPluginDescriptionStatic(), CreateInstance,
|
|
CreateMemoryInstance, GetModuleSpecifications);
|
|
}
|
|
|
|
void ObjectFileBreakpad::Terminate() {
|
|
PluginManager::UnregisterPlugin(CreateInstance);
|
|
}
|
|
|
|
ObjectFile *ObjectFileBreakpad::CreateInstance(const ModuleSP &module_sp,
|
|
DataExtractorSP extractor_sp,
|
|
offset_t data_offset,
|
|
const FileSpec *file,
|
|
offset_t file_offset,
|
|
offset_t length) {
|
|
if (!extractor_sp || !extractor_sp->HasData()) {
|
|
DataBufferSP data_sp = MapFileData(*file, length, file_offset);
|
|
if (!data_sp)
|
|
return nullptr;
|
|
extractor_sp = std::make_shared<DataExtractor>(data_sp);
|
|
data_offset = 0;
|
|
}
|
|
// If this is opearting on a VirtualDataExtractor, it can have
|
|
// gaps between valid bytes in the DataBuffer. We extract an
|
|
// ArrayRef of the raw bytes, and can segfault.
|
|
DataExtractorSP contiguous_extractor_sp =
|
|
extractor_sp->GetContiguousDataExtractorSP();
|
|
auto text = toStringRef(contiguous_extractor_sp->GetData());
|
|
std::optional<Header> header = Header::parse(text);
|
|
if (!header)
|
|
return nullptr;
|
|
|
|
// Update the data to contain the entire file if it doesn't already
|
|
if (contiguous_extractor_sp->GetByteSize() < length) {
|
|
DataBufferSP data_sp = MapFileData(*file, length, file_offset);
|
|
data_sp = MapFileData(*file, length, file_offset);
|
|
if (!data_sp)
|
|
return nullptr;
|
|
contiguous_extractor_sp = std::make_shared<DataExtractor>(data_sp);
|
|
data_offset = 0;
|
|
}
|
|
|
|
return new ObjectFileBreakpad(
|
|
module_sp, contiguous_extractor_sp, data_offset, file, file_offset,
|
|
length, std::move(header->arch), std::move(header->uuid));
|
|
}
|
|
|
|
ObjectFile *ObjectFileBreakpad::CreateMemoryInstance(
|
|
const ModuleSP &module_sp, WritableDataBufferSP data_sp,
|
|
const ProcessSP &process_sp, addr_t header_addr) {
|
|
return nullptr;
|
|
}
|
|
|
|
size_t ObjectFileBreakpad::GetModuleSpecifications(
|
|
const FileSpec &file, DataExtractorSP &extractor_sp, offset_t data_offset,
|
|
offset_t file_offset, offset_t length, ModuleSpecList &specs) {
|
|
if (!extractor_sp || !extractor_sp->HasData())
|
|
return 0;
|
|
// If this is opearting on a VirtualDataExtractor, it can have
|
|
// gaps between valid bytes in the DataBuffer. We extract an
|
|
// ArrayRef of the raw bytes, and can segfault.
|
|
DataExtractorSP contiguous_extractor_sp =
|
|
extractor_sp->GetContiguousDataExtractorSP();
|
|
auto text = toStringRef(contiguous_extractor_sp->GetData());
|
|
std::optional<Header> header = Header::parse(text);
|
|
if (!header)
|
|
return 0;
|
|
ModuleSpec spec(file, std::move(header->arch));
|
|
spec.GetUUID() = std::move(header->uuid);
|
|
specs.Append(spec);
|
|
return 1;
|
|
}
|
|
|
|
ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp,
|
|
DataExtractorSP extractor_sp,
|
|
offset_t data_offset,
|
|
const FileSpec *file, offset_t offset,
|
|
offset_t length, ArchSpec arch,
|
|
UUID uuid)
|
|
: ObjectFile(module_sp, file, offset, length, extractor_sp, data_offset),
|
|
m_arch(std::move(arch)), m_uuid(std::move(uuid)) {}
|
|
|
|
bool ObjectFileBreakpad::ParseHeader() {
|
|
// We already parsed the header during initialization.
|
|
return true;
|
|
}
|
|
|
|
void ObjectFileBreakpad::ParseSymtab(Symtab &symtab) {
|
|
// Nothing to do for breakpad files, all information is parsed as debug info
|
|
// which means "lldb_private::Function" objects are used, or symbols are added
|
|
// by the SymbolFileBreakpad::AddSymbols(...) function in the symbol file.
|
|
}
|
|
|
|
void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
|
|
if (m_sections_up)
|
|
return;
|
|
m_sections_up = std::make_unique<SectionList>();
|
|
|
|
std::optional<Record::Kind> current_section;
|
|
offset_t section_start;
|
|
llvm::StringRef text = toStringRef(m_data_nsp->GetData());
|
|
uint32_t next_section_id = 1;
|
|
auto maybe_add_section = [&](const uint8_t *end_ptr) {
|
|
if (!current_section)
|
|
return; // We have been called before parsing the first line.
|
|
|
|
offset_t end_offset = end_ptr - m_data_nsp->GetDataStart();
|
|
auto section_sp = std::make_shared<Section>(
|
|
GetModule(), this, next_section_id++,
|
|
ConstString(toString(*current_section)), eSectionTypeOther,
|
|
/*file_vm_addr*/ 0, /*vm_size*/ 0, section_start,
|
|
end_offset - section_start, /*log2align*/ 0, /*flags*/ 0);
|
|
m_sections_up->AddSection(section_sp);
|
|
unified_section_list.AddSection(section_sp);
|
|
};
|
|
while (!text.empty()) {
|
|
llvm::StringRef line;
|
|
std::tie(line, text) = text.split('\n');
|
|
|
|
std::optional<Record::Kind> next_section = Record::classify(line);
|
|
if (next_section == Record::Line || next_section == Record::Inline) {
|
|
// Line/Inline records logically belong to the preceding Func record, so
|
|
// we put them in the same section.
|
|
next_section = Record::Func;
|
|
}
|
|
if (next_section == current_section)
|
|
continue;
|
|
|
|
// Changing sections, finish off the previous one, if there was any.
|
|
maybe_add_section(line.bytes_begin());
|
|
// And start a new one.
|
|
current_section = next_section;
|
|
section_start = line.bytes_begin() - m_data_nsp->GetDataStart();
|
|
}
|
|
// Finally, add the last section.
|
|
maybe_add_section(m_data_nsp->GetDataEnd());
|
|
}
|