Files
llvm-project/lldb/source/Utility/VirtualDataExtractor.cpp
Jason Molenda af3cc148c3 [lldb][NFC] Add some override methods to VirtualDataExtractor (#179858)
This changes VirtualDataExtractor's GetByteSize to return the virtual
byte size of the buffer (external users only understand the data
contents in terms of the virtual sizes & offsets). There are check
methods in DataExtractor that check they are not going off the end of a
buffer, they usually use the BytesLeft() method. There are a couple of
callers of BytesLeft() externally, but it is predominantly an internal
use API. I have BytesLeft() use the physical size of the buffer, not the
virtual size, for the benefit of the DataExtractor methods. (and to
avoid duplicating all of them down in VirtualDataExtractor)

Another problem is the we call SetData on DataExtractorSP's (e.g. see
the ObjectFile ctor) with the DataBuffer it already has, an offset of 0,
and the GetByteSize. A no-op for a DataExtractor that is already using
that DataBuffer. But SetData would try to use that length as a physical
size, and truncate the buffer that the DataExtractor would accept.

I added VirtualDataExtractor subclass methods for the SetData's, detect
(1) data being added to an uninitialized DataExtractor, (2) the same
data / offset / length as currently being used is added to the
DataExtractor (a no-op), or (3) we're genuinely changing the data source
or setting an offset / length that is different. This final case we're
not ready to handle today, I added asserts for them so we can catch it
in debug builds, and then I clear the LookupTable and add a no-op entry
so this extractor will behave like a plain DataExtractor -- because I
don't know better to do. If we genuinely need to handle this case, and
I'm pretty sure we don't need to, I'd have to assume that we're taking a
subset of the original data source (an offset & length), so we'd need to
update all of the LookupTable entries to reflect the new offsets, and
remove entries that are no longer referring to the subsetted range. I'll
leave that until there's any evidence it's actually needed.

rdar://148939795
2026-02-05 15:10:54 -08:00

315 lines
12 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "lldb/Utility/VirtualDataExtractor.h"
#include <cassert>
using namespace lldb;
using namespace lldb_private;
VirtualDataExtractor::VirtualDataExtractor(const void *data,
offset_t data_length,
ByteOrder byte_order,
uint32_t addr_size,
LookupTable lookup_table)
: DataExtractor(data, data_length, byte_order, addr_size),
m_lookup_table(std::move(lookup_table)) {
m_lookup_table.Sort();
}
VirtualDataExtractor::VirtualDataExtractor(const DataBufferSP &data_sp,
ByteOrder byte_order,
uint32_t addr_size,
LookupTable lookup_table)
: DataExtractor(data_sp, byte_order, addr_size),
m_lookup_table(std::move(lookup_table)) {
m_lookup_table.Sort();
}
VirtualDataExtractor::VirtualDataExtractor(const DataBufferSP &data_sp,
LookupTable lookup_table)
: DataExtractor(data_sp), m_lookup_table(std::move(lookup_table)) {
m_lookup_table.Sort();
}
const VirtualDataExtractor::LookupTable::Entry *
VirtualDataExtractor::FindEntry(offset_t virtual_addr) const {
// Use RangeDataVector's binary search instead of linear search.
return m_lookup_table.FindEntryThatContains(virtual_addr);
}
bool VirtualDataExtractor::ValidateVirtualRead(offset_t virtual_addr,
offset_t length) const {
const LookupTable::Entry *entry = FindEntry(virtual_addr);
if (!entry)
return false;
// Assert that the read does not cross entry boundaries.
// RangeData.Contains() checks if a range is fully contained.
assert(entry->Contains(LookupTable::Range(virtual_addr, length)) &&
"Read crosses lookup table entry boundary");
// Also validate that the physical offset is within the data buffer.
// RangeData.data contains the physical offset.
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
return ValidOffsetForDataOfSize(physical_offset, length);
}
const void *VirtualDataExtractor::GetData(offset_t *offset_ptr,
offset_t length) const {
// Override to treat offset as virtual address.
if (!offset_ptr)
return nullptr;
offset_t virtual_addr = *offset_ptr;
if (!ValidateVirtualRead(virtual_addr, length))
return nullptr;
const LookupTable::Entry *entry = FindEntry(virtual_addr);
assert(entry && "ValidateVirtualRead should have found an entry");
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
// Use base class PeekData directly to avoid recursion.
const void *result = DataExtractor::PeekData(physical_offset, length);
if (result) {
// Advance the virtual offset pointer.
*offset_ptr += length;
}
return result;
}
offset_t VirtualDataExtractor::SetData(const void *bytes, lldb::offset_t length,
lldb::ByteOrder byte_order) {
// Invoked from the base class ctor.
if (!m_data_sp || m_start == nullptr)
return DataExtractor::SetData(bytes, length, byte_order);
// A no-op SetData that is setting the same data buffer again.
if (!m_data_sp && m_start == bytes && length == GetVirtualByteSize())
return GetVirtualByteSize();
assert("SetData(1) called on VirtualDataExtractor that already had data" &&
false);
DataExtractor::SetData(bytes, length, byte_order);
ResetLookupTableToMatchPhysical();
return GetVirtualByteSize();
}
offset_t VirtualDataExtractor::SetData(const DataExtractor &data,
lldb::offset_t offset,
lldb::offset_t length) {
// Invoked from the base class ctor
if (!m_data_sp || m_start == nullptr)
return DataExtractor::SetData(data, offset, length);
// A no-op SetData that is setting the same data buffer again
if (m_data_sp && data.GetSharedDataBuffer().get() == m_data_sp.get() &&
offset == 0 && length == GetVirtualByteSize())
return GetVirtualByteSize();
assert("SetData(2) called on VirtualDataExtractor that already had data" &&
false);
DataExtractor::SetData(data, offset, length);
ResetLookupTableToMatchPhysical();
return GetVirtualByteSize();
}
offset_t VirtualDataExtractor::SetData(const lldb::DataBufferSP &data_sp,
lldb::offset_t offset,
lldb::offset_t length) {
// Invoked from the base class ctor
if (!m_data_sp || m_start == nullptr)
return DataExtractor::SetData(data_sp, offset, length);
// A no-op SetData that is setting the same data buffer again
if (m_data_sp && data_sp.get() == m_data_sp.get() && offset == 0 &&
length == GetVirtualByteSize())
return GetVirtualByteSize();
assert("SetData(3) called on VirtualDataExtractor that already had data" &&
false);
DataExtractor::SetData(data_sp, offset, length);
ResetLookupTableToMatchPhysical();
return GetVirtualByteSize();
}
void VirtualDataExtractor::ResetLookupTableToMatchPhysical() {
// calling SetData on a VirtualDataExtractor that already has a
// data buffer means the LookupTable needs to be either replaced, or
// if we assume the buffer is a subset of the original, we need to
// update all the entries to have correct new offsets into the buffer
// and remove entries that are outside the new range.
// For now, zero out the LookupTable and behave as if this is a simple
// DataExtractor.
m_lookup_table.Clear();
m_lookup_table.Append(
VirtualDataExtractor::LookupTable::Entry{0, GetPhysicalByteSize(), 0});
}
uint64_t VirtualDataExtractor::GetVirtualByteSize() const {
offset_t lowest = -1ULL;
offset_t highest = 0;
for (const auto ent : m_lookup_table) {
lowest = std::min(lowest, ent.base);
highest = std::max(highest, ent.base + ent.size);
}
return highest - lowest;
}
uint64_t VirtualDataExtractor::GetPhysicalByteSize() const {
return m_end - m_start;
}
offset_t VirtualDataExtractor::VirtualBytesLeft(offset_t virtual_offset) const {
const offset_t size = GetVirtualByteSize();
if (size > virtual_offset)
return size - virtual_offset;
return 0;
}
offset_t
VirtualDataExtractor::PhysicalBytesLeft(offset_t physical_offset) const {
const offset_t size = m_end - m_start;
if (size > physical_offset)
return size - physical_offset;
return 0;
}
const uint8_t *VirtualDataExtractor::PeekData(offset_t offset,
offset_t length) const {
// Override to treat offset as virtual address.
if (!ValidateVirtualRead(offset, length))
return nullptr;
const LookupTable::Entry *entry = FindEntry(offset);
assert(entry && "ValidateVirtualRead should have found an entry");
offset_t physical_offset = entry->data + (offset - entry->base);
// Use the base class PeekData with the physical offset.
return DataExtractor::PeekData(physical_offset, length);
}
uint8_t VirtualDataExtractor::GetU8_unchecked(offset_t *offset_ptr) const {
offset_t virtual_addr = *offset_ptr;
const LookupTable::Entry *entry = FindEntry(virtual_addr);
assert(entry && "Unchecked methods require valid virtual address");
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
uint8_t result = DataExtractor::GetU8_unchecked(&physical_offset);
*offset_ptr += 1;
return result;
}
uint16_t VirtualDataExtractor::GetU16_unchecked(offset_t *offset_ptr) const {
offset_t virtual_addr = *offset_ptr;
const LookupTable::Entry *entry = FindEntry(virtual_addr);
assert(entry && "Unchecked methods require valid virtual address");
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
uint16_t result = DataExtractor::GetU16_unchecked(&physical_offset);
*offset_ptr += 2;
return result;
}
uint32_t VirtualDataExtractor::GetU32_unchecked(offset_t *offset_ptr) const {
offset_t virtual_addr = *offset_ptr;
const LookupTable::Entry *entry = FindEntry(virtual_addr);
assert(entry && "Unchecked methods require valid virtual address");
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
uint32_t result = DataExtractor::GetU32_unchecked(&physical_offset);
*offset_ptr += 4;
return result;
}
uint64_t VirtualDataExtractor::GetU64_unchecked(offset_t *offset_ptr) const {
offset_t virtual_addr = *offset_ptr;
const LookupTable::Entry *entry = FindEntry(virtual_addr);
assert(entry && "Unchecked methods require valid virtual address");
offset_t physical_offset = entry->data + (virtual_addr - entry->base);
uint64_t result = DataExtractor::GetU64_unchecked(&physical_offset);
*offset_ptr += 8;
return result;
}
DataExtractorSP
VirtualDataExtractor::GetSubsetExtractorSP(offset_t virtual_offset,
offset_t virtual_length) {
const LookupTable::Entry *entry = FindEntry(virtual_offset);
assert(
entry &&
"VirtualDataExtractor subset extractor requires valid virtual address");
if (!entry)
return {};
// Entry::data is the offset into the DataBuffer's actual start/end range
// Entry::base is the virtual address at the start of this region of data
offset_t offset_into_entry_range = virtual_offset - entry->base;
assert(
offset_into_entry_range + virtual_length <= entry->size &&
"VirtualDataExtractor subset may not span multiple LookupTable entries");
if (offset_into_entry_range + virtual_length > entry->size)
return {};
// We could support a Subset VirtualDataExtractor which covered
// multiple LookupTable virtual entries, but we'd need to mutate
// all of the LookupTable entries that were properly included in
// the Subset, a bit tricky. So we won't implement that until it's
// needed.
offset_t physical_start = entry->data + offset_into_entry_range;
std::shared_ptr<DataExtractor> new_sp = std::make_shared<DataExtractor>(
GetSharedDataBuffer(), GetByteOrder(), GetAddressByteSize());
new_sp->SetData(GetSharedDataBuffer(), physical_start, virtual_length);
return new_sp;
}
// Return a DataExtractorSP that contains a single LookupTable's entry; all
// bytes are guaranteed to be readable.
DataExtractorSP
VirtualDataExtractor::GetSubsetExtractorSP(offset_t virtual_offset) {
const LookupTable::Entry *entry = FindEntry(virtual_offset);
assert(
entry &&
"VirtualDataExtractor subset extractor requires valid virtual address");
if (!entry)
return {};
// Entry::data is the offset into the DataBuffer's actual start/end range
// Entry::base is the virtual address at the start of this region of data
offset_t offset_into_entry_range = virtual_offset - entry->base;
offset_t physical_start = entry->data + offset_into_entry_range;
std::shared_ptr<DataExtractor> new_sp = std::make_shared<DataExtractor>(
GetSharedDataBuffer(), GetByteOrder(), GetAddressByteSize());
new_sp->SetData(GetSharedDataBuffer(), physical_start,
entry->size - offset_into_entry_range);
return new_sp;
}
// Return an ArrayRef to the first contiguous region of the LookupTable
// only. The LookupTable entries may have gaps of unmapped data, and we
// can't include those in the ArrayRef or something may touch those pages.
llvm::ArrayRef<uint8_t> VirtualDataExtractor::GetData() const {
const LookupTable::Entry *entry = FindEntry(0);
assert(entry &&
"VirtualDataExtractor GetData requires valid virtual address");
if (!entry)
return {};
return {m_start + static_cast<size_t>(entry->data), static_cast<size_t>(entry->size)};
}