In a PR last month I changed the ObjectFile CreateInstance etc methods to accept an optional DataExtractorSP instead of a DataBufferSP, and retain the extractor in a shared pointer internally in all of the ObjectFile subclasses. This is laying the groundwork for using a VirtualDataExtractor for some Mach-O binaries on macOS, where the segments of the binary are out-of-order in actual memory, and we add a lookup table to make it appear that the TEXT segment is at offset 0 in the Extractor, etc. Working on the actual implementation, I realized we were still using DataBufferSP's in ModuleSpec and Module, as well as in ObjectFile::GetModuleSpecifications. I originally was making a much larger NFC change where I had all ObjectFile subclasses operating on DataExtractors throughout their implementation, as well as in the DWARF parser. It was a very large patchset. Many subclasses start with their DataExtractor, then create smaller DataExtractors for parts of the binary image - the string table, the symbol table, etc., for processing. After consideration and discussion with Jonas, we agreed that a segment/section of a binary will never require a lookup table to access the bytes within it, so I changed VirtualDataExtractor::GetSubsetExtractorSP to (1) require that the Subset be contained within a single lookup table entry, and (2) return a simple DataExtractor bounded on that byte range. By doing this, I was able to remove all of my very-invasive changes to the ObjectFile subclass internals; it's only when they are operating on the entire binary image that care is needed. One pattern that subclasses like ObjectFileBreakpad use is to take an ArrayRef of the DataBuffer for a binary, then create a StringRef of that, then look for strings in it. With a VirtualDataExtractor and out-of-order binary segments, with gaps between them, this allows us to search the entire buffer looking for a string, and segfault when it gets to an unmapped region of the buffer. I added a VirtualDataExtractor::GetSubsetExtractorSP(0) which gets the largest contiguous memory region starting at offset 0 for this use case, and I added a comment about what was being done there because I know it is not obvious, and people not working on macOS wouldn't be familiar with the requirement. (when we have a ModuleSpec with a DataExtractor, any of the ObjectFile subclasses get a shot at Creating, so they all have to be able to iterate on these) rdar://148939795
327 lines
12 KiB
C++
327 lines
12 KiB
C++
//===-- ObjectFileCOFF.cpp ------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "ObjectFileCOFF.h"
|
|
|
|
#include "lldb/Core/Module.h"
|
|
#include "lldb/Core/ModuleSpec.h"
|
|
#include "lldb/Core/PluginManager.h"
|
|
#include "lldb/Utility/DataExtractor.h"
|
|
#include "lldb/Utility/LLDBLog.h"
|
|
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/FormatAdapters.h"
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::object;
|
|
|
|
static bool IsCOFFObjectFile(const llvm::ArrayRef<uint8_t> data) {
|
|
return identify_magic(toStringRef(data)) == file_magic::coff_object;
|
|
}
|
|
|
|
LLDB_PLUGIN_DEFINE(ObjectFileCOFF)
|
|
|
|
char ObjectFileCOFF::ID;
|
|
|
|
ObjectFileCOFF::~ObjectFileCOFF() = default;
|
|
|
|
void ObjectFileCOFF::Initialize() {
|
|
PluginManager::RegisterPlugin(GetPluginNameStatic(),
|
|
GetPluginDescriptionStatic(), CreateInstance,
|
|
CreateMemoryInstance, GetModuleSpecifications);
|
|
}
|
|
|
|
void ObjectFileCOFF::Terminate() {
|
|
PluginManager::UnregisterPlugin(CreateInstance);
|
|
}
|
|
|
|
lldb_private::ObjectFile *
|
|
ObjectFileCOFF::CreateInstance(const ModuleSP &module_sp,
|
|
DataExtractorSP extractor_sp,
|
|
offset_t data_offset, const FileSpec *file,
|
|
offset_t file_offset, offset_t length) {
|
|
Log *log = GetLog(LLDBLog::Object);
|
|
|
|
if (!extractor_sp || !extractor_sp->HasData()) {
|
|
DataBufferSP data_sp = MapFileData(*file, length, file_offset);
|
|
if (!data_sp) {
|
|
LLDB_LOG(log,
|
|
"Failed to create ObjectFileCOFF instance: cannot read file {0}",
|
|
file->GetPath());
|
|
return nullptr;
|
|
}
|
|
extractor_sp = std::make_shared<lldb_private::DataExtractor>(data_sp);
|
|
data_offset = 0;
|
|
}
|
|
|
|
assert(extractor_sp && extractor_sp->HasData() &&
|
|
"must have mapped file at this point");
|
|
|
|
// If this is operating on a VirtualDataExtractor, it can have
|
|
// gaps between valid bytes in the DataBuffer. We extract an
|
|
// ArrayRef of the raw bytes, and can segfault.
|
|
DataExtractorSP contiguous_extractor_sp =
|
|
extractor_sp->GetContiguousDataExtractorSP();
|
|
if (!IsCOFFObjectFile(contiguous_extractor_sp->GetData()))
|
|
return nullptr;
|
|
|
|
if (contiguous_extractor_sp->GetByteSize() < length) {
|
|
DataBufferSP data_sp = MapFileData(*file, length, file_offset);
|
|
if (!data_sp) {
|
|
LLDB_LOG(log,
|
|
"Failed to create ObjectFileCOFF instance: cannot read file {0}",
|
|
file->GetPath());
|
|
return nullptr;
|
|
}
|
|
contiguous_extractor_sp =
|
|
std::make_shared<lldb_private::DataExtractor>(data_sp);
|
|
data_offset = 0;
|
|
}
|
|
|
|
MemoryBufferRef buffer{toStringRef(contiguous_extractor_sp->GetData()),
|
|
file->GetFilename().GetStringRef()};
|
|
|
|
Expected<std::unique_ptr<Binary>> binary = createBinary(buffer);
|
|
if (!binary) {
|
|
LLDB_LOG_ERROR(log, binary.takeError(),
|
|
"Failed to create binary for file ({1}): {0}",
|
|
file->GetPath());
|
|
return nullptr;
|
|
}
|
|
|
|
LLDB_LOG(log, "ObjectFileCOFF::ObjectFileCOFF module = {1} ({2}), file = {3}",
|
|
module_sp.get(), module_sp->GetSpecificationDescription(),
|
|
file->GetPath());
|
|
|
|
return new ObjectFileCOFF(unique_dyn_cast<COFFObjectFile>(std::move(*binary)),
|
|
module_sp, contiguous_extractor_sp, data_offset,
|
|
file, file_offset, length);
|
|
}
|
|
|
|
lldb_private::ObjectFile *ObjectFileCOFF::CreateMemoryInstance(
|
|
const ModuleSP &module_sp, WritableDataBufferSP data_sp,
|
|
const ProcessSP &process_sp, addr_t header) {
|
|
// FIXME: do we need to worry about construction from a memory region?
|
|
return nullptr;
|
|
}
|
|
|
|
size_t ObjectFileCOFF::GetModuleSpecifications(
|
|
const FileSpec &file, DataExtractorSP &extractor_sp, offset_t data_offset,
|
|
offset_t file_offset, offset_t length, ModuleSpecList &specs) {
|
|
if (!extractor_sp || !extractor_sp->HasData())
|
|
return 0;
|
|
|
|
// If this is opearting on a VirtualDataExtractor, it can have
|
|
// gaps between valid bytes in the DataBuffer. We extract an
|
|
// ArrayRef of the raw bytes, and can segfault.
|
|
DataExtractorSP contiguous_extractor_sp =
|
|
extractor_sp->GetContiguousDataExtractorSP();
|
|
if (!contiguous_extractor_sp)
|
|
return 0;
|
|
if (!IsCOFFObjectFile(contiguous_extractor_sp->GetData()))
|
|
return 0;
|
|
|
|
MemoryBufferRef buffer{toStringRef(contiguous_extractor_sp->GetData()),
|
|
file.GetFilename().GetStringRef()};
|
|
Expected<std::unique_ptr<Binary>> binary = createBinary(buffer);
|
|
if (!binary) {
|
|
Log *log = GetLog(LLDBLog::Object);
|
|
LLDB_LOG_ERROR(log, binary.takeError(),
|
|
"Failed to create binary for file ({1}): {0}",
|
|
file.GetFilename());
|
|
return 0;
|
|
}
|
|
|
|
std::unique_ptr<COFFObjectFile> object =
|
|
unique_dyn_cast<COFFObjectFile>(std::move(*binary));
|
|
switch (static_cast<COFF::MachineTypes>(object->getMachine())) {
|
|
case COFF::IMAGE_FILE_MACHINE_I386:
|
|
specs.Append(ModuleSpec(file, ArchSpec("i686-unknown-windows-msvc")));
|
|
return 1;
|
|
case COFF::IMAGE_FILE_MACHINE_AMD64:
|
|
specs.Append(ModuleSpec(file, ArchSpec("x86_64-unknown-windows-msvc")));
|
|
return 1;
|
|
case COFF::IMAGE_FILE_MACHINE_ARMNT:
|
|
specs.Append(ModuleSpec(file, ArchSpec("armv7-unknown-windows-msvc")));
|
|
return 1;
|
|
case COFF::IMAGE_FILE_MACHINE_ARM64:
|
|
specs.Append(ModuleSpec(file, ArchSpec("aarch64-unknown-windows-msvc")));
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void ObjectFileCOFF::Dump(Stream *stream) {
|
|
ModuleSP module(GetModule());
|
|
if (!module)
|
|
return;
|
|
|
|
std::lock_guard<std::recursive_mutex> guard(module->GetMutex());
|
|
|
|
stream->Printf("%p: ", static_cast<void *>(this));
|
|
stream->Indent();
|
|
stream->PutCString("ObjectFileCOFF");
|
|
*stream << ", file = '" << m_file
|
|
<< "', arch = " << GetArchitecture().GetArchitectureName() << '\n';
|
|
|
|
if (SectionList *sections = GetSectionList())
|
|
sections->Dump(stream->AsRawOstream(), stream->GetIndentLevel(), nullptr,
|
|
true, std::numeric_limits<uint32_t>::max());
|
|
}
|
|
|
|
uint32_t ObjectFileCOFF::GetAddressByteSize() const {
|
|
return const_cast<ObjectFileCOFF *>(this)->GetArchitecture().GetAddressByteSize();
|
|
}
|
|
|
|
ArchSpec ObjectFileCOFF::GetArchitecture() {
|
|
switch (static_cast<COFF::MachineTypes>(m_object->getMachine())) {
|
|
case COFF::IMAGE_FILE_MACHINE_I386:
|
|
return ArchSpec("i686-unknown-windows-msvc");
|
|
case COFF::IMAGE_FILE_MACHINE_AMD64:
|
|
return ArchSpec("x86_64-unknown-windows-msvc");
|
|
case COFF::IMAGE_FILE_MACHINE_ARMNT:
|
|
return ArchSpec("armv7-unknown-windows-msvc");
|
|
case COFF::IMAGE_FILE_MACHINE_ARM64:
|
|
return ArchSpec("aarch64-unknown-windows-msvc");
|
|
default:
|
|
return ArchSpec();
|
|
}
|
|
}
|
|
|
|
void ObjectFileCOFF::CreateSections(lldb_private::SectionList §ions) {
|
|
if (m_sections_up)
|
|
return;
|
|
|
|
m_sections_up = std::make_unique<SectionList>();
|
|
ModuleSP module(GetModule());
|
|
if (!module)
|
|
return;
|
|
|
|
std::lock_guard<std::recursive_mutex> guard(module->GetMutex());
|
|
|
|
auto SectionType = [](StringRef Name,
|
|
const coff_section *Section) -> lldb::SectionType {
|
|
// DWARF Debug Sections
|
|
if (Name.consume_front(".debug_"))
|
|
return GetDWARFSectionTypeFromName(Name);
|
|
|
|
lldb::SectionType type = StringSwitch<lldb::SectionType>(Name)
|
|
// CodeView Debug Sections: .debug$S, .debug$T
|
|
.StartsWith(".debug$", eSectionTypeDebug)
|
|
.Case("clangast", eSectionTypeOther)
|
|
.Default(eSectionTypeInvalid);
|
|
if (type != eSectionTypeInvalid)
|
|
return type;
|
|
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_CNT_CODE)
|
|
return eSectionTypeCode;
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
|
|
return eSectionTypeData;
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
|
|
return Section->SizeOfRawData ? eSectionTypeData : eSectionTypeZeroFill;
|
|
return eSectionTypeOther;
|
|
};
|
|
auto Permissions = [](const object::coff_section *Section) -> uint32_t {
|
|
uint32_t permissions = 0;
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_MEM_EXECUTE)
|
|
permissions |= lldb::ePermissionsExecutable;
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_MEM_READ)
|
|
permissions |= lldb::ePermissionsReadable;
|
|
if (Section->Characteristics & COFF::IMAGE_SCN_MEM_WRITE)
|
|
permissions |= lldb::ePermissionsWritable;
|
|
return permissions;
|
|
};
|
|
|
|
for (const auto &SecRef : m_object->sections()) {
|
|
const auto COFFSection = m_object->getCOFFSection(SecRef);
|
|
|
|
llvm::Expected<StringRef> Name = SecRef.getName();
|
|
StringRef SectionName = Name ? *Name : COFFSection->Name;
|
|
if (!Name)
|
|
consumeError(Name.takeError());
|
|
|
|
SectionSP section =
|
|
std::make_unique<Section>(module, this,
|
|
static_cast<user_id_t>(SecRef.getIndex()),
|
|
ConstString(SectionName),
|
|
SectionType(SectionName, COFFSection),
|
|
COFFSection->VirtualAddress,
|
|
COFFSection->VirtualSize,
|
|
COFFSection->PointerToRawData,
|
|
COFFSection->SizeOfRawData,
|
|
COFFSection->getAlignment(),
|
|
0);
|
|
section->SetPermissions(Permissions(COFFSection));
|
|
|
|
m_sections_up->AddSection(section);
|
|
sections.AddSection(section);
|
|
}
|
|
}
|
|
|
|
void ObjectFileCOFF::ParseSymtab(lldb_private::Symtab &symtab) {
|
|
Log *log = GetLog(LLDBLog::Object);
|
|
|
|
SectionList *sections = GetSectionList();
|
|
symtab.Reserve(symtab.GetNumSymbols() + m_object->getNumberOfSymbols());
|
|
|
|
auto SymbolType = [](const COFFSymbolRef &Symbol) -> lldb::SymbolType {
|
|
if (Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION)
|
|
return eSymbolTypeCode;
|
|
if (Symbol.getBaseType() == COFF::IMAGE_SYM_TYPE_NULL &&
|
|
Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_NULL)
|
|
return eSymbolTypeData;
|
|
return eSymbolTypeInvalid;
|
|
};
|
|
|
|
for (const auto &SymRef : m_object->symbols()) {
|
|
const auto COFFSymRef = m_object->getCOFFSymbol(SymRef);
|
|
|
|
Expected<StringRef> NameOrErr = SymRef.getName();
|
|
if (!NameOrErr) {
|
|
LLDB_LOG_ERROR(log, NameOrErr.takeError(),
|
|
"ObjectFileCOFF: failed to get symbol name: {0}");
|
|
continue;
|
|
}
|
|
|
|
Symbol symbol;
|
|
symbol.GetMangled().SetValue(ConstString(*NameOrErr));
|
|
|
|
int16_t SecIdx = static_cast<int16_t>(COFFSymRef.getSectionNumber());
|
|
if (SecIdx == COFF::IMAGE_SYM_ABSOLUTE) {
|
|
symbol.GetAddressRef() = Address{COFFSymRef.getValue()};
|
|
symbol.SetType(eSymbolTypeAbsolute);
|
|
} else if (SecIdx >= 1) {
|
|
symbol.GetAddressRef() = Address(sections->GetSectionAtIndex(SecIdx - 1),
|
|
COFFSymRef.getValue());
|
|
symbol.SetType(SymbolType(COFFSymRef));
|
|
}
|
|
|
|
symtab.AddSymbol(symbol);
|
|
}
|
|
|
|
LLDB_LOG(log, "ObjectFileCOFF::ParseSymtab processed {0} symbols",
|
|
m_object->getNumberOfSymbols());
|
|
}
|
|
|
|
bool ObjectFileCOFF::ParseHeader() {
|
|
ModuleSP module(GetModule());
|
|
if (!module)
|
|
return false;
|
|
|
|
std::lock_guard<std::recursive_mutex> guard(module->GetMutex());
|
|
|
|
m_data_nsp->SetByteOrder(eByteOrderLittle);
|
|
m_data_nsp->SetAddressByteSize(GetAddressByteSize());
|
|
|
|
return true;
|
|
}
|