Summary:
Currently, the GPU iterates through all of the present symbols and
copies them by prefix. This is inefficient as it requires a lot of small
high-latency data transfers rather than a few large ones. Additionally,
we force every single profiling symbol to have protected visibility.
This means potentially hundreds of unnecessary symbols in the symbol
table.
This PR changes the interface to move towards the start / stop section
handling. AMDGPU supports this natively as an ELF target, so we need
little changes. Instead of overriding visibility, we use a single table
to define the bounds that we can obtain with one contiguous load.
Using a table interface should also work for the in-progress HIP
implementation for this, as it wraps the start / stop sections into
standard void pointers which will be inside of an already mapped region
of memory, so they should be accessible from the HIP API.
NVPTX is more difficult as it is an ELF platform without this support. I
have hooked up the 'Other' handling to work around this, but even then
it's a bit of a stretch. I could remove this support here, but I wanted
to demonstrate that we can share the ABI. However, NVPTX will only work
if we force LTO and change the backend to emit variables in the same
TL;DR, we now do this:
```c
struct { start1, stop1, start2, stop2, start3, stop3, version; } device;
struct host = DtoH(lookup("device"));
counters = DtoH(host.stop - host.start)
version = DtoH(host.version);
```
257 lines
10 KiB
C
257 lines
10 KiB
C
/*===- InstrProfilingPlatformLinux.c - Profile data Linux platform ------===*\
|
|
|*
|
|
|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|* See https://llvm.org/LICENSE.txt for license information.
|
|
|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|*
|
|
\*===----------------------------------------------------------------------===*/
|
|
|
|
// This file defines profile data symbols for ELF, wasm, XCOFF. It assumes
|
|
// __start_ and __stop_ symbols for profile data point at the beginning and
|
|
// end of the sections in question. (This is technically a linker feature,
|
|
// not a file format feature, but linkers for these targets support it.)
|
|
//
|
|
// MachO (MacOS/iOS) and PE-COFF (Windows) have a similar support, but the
|
|
// identifiers are different, so the support is in separate files.
|
|
//
|
|
// Support for targets which don't have linker support is in
|
|
// InstrProfilingPlatformOther.c.
|
|
//
|
|
// This file also contains code to extract ELF build IDs from the ELF file,
|
|
// to identify the build which generated the file.
|
|
|
|
#if defined(__linux__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
|
|
(defined(__sun__) && defined(__svr4__)) || defined(__NetBSD__) || \
|
|
defined(_AIX) || defined(__wasm__) || defined(__HAIKU__) || \
|
|
(defined(COMPILER_RT_PROFILE_BAREMETAL) && !defined(__NVPTX__))
|
|
|
|
#if !defined(_AIX) && !defined(__wasm__) && \
|
|
!defined(COMPILER_RT_PROFILE_BAREMETAL)
|
|
// Includes for non-baremetal ELF targets, used to output build IDs.
|
|
#include <elf.h>
|
|
#include <link.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#include "InstrProfiling.h"
|
|
#include "InstrProfilingInternal.h"
|
|
|
|
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
|
|
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
|
|
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
|
|
#define PROF_NAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_NAME_COMMON)
|
|
#define PROF_VNAME_START INSTR_PROF_SECT_START(INSTR_PROF_VNAME_COMMON)
|
|
#define PROF_VNAME_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNAME_COMMON)
|
|
#define PROF_CNTS_START INSTR_PROF_SECT_START(INSTR_PROF_CNTS_COMMON)
|
|
#define PROF_CNTS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_CNTS_COMMON)
|
|
#define PROF_VTABLE_START INSTR_PROF_SECT_START(INSTR_PROF_VTAB_COMMON)
|
|
#define PROF_VTABLE_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VTAB_COMMON)
|
|
#define PROF_BITS_START INSTR_PROF_SECT_START(INSTR_PROF_BITS_COMMON)
|
|
#define PROF_BITS_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_BITS_COMMON)
|
|
#define PROF_VNODES_START INSTR_PROF_SECT_START(INSTR_PROF_VNODES_COMMON)
|
|
#define PROF_VNODES_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_VNODES_COMMON)
|
|
#define PROF_COVINIT_START INSTR_PROF_SECT_START(INSTR_PROF_COVINIT_COMMON)
|
|
#define PROF_COVINIT_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_COVINIT_COMMON)
|
|
|
|
/* Declare section start and stop symbols for various sections
|
|
* generated by compiler instrumentation.
|
|
*/
|
|
extern __llvm_profile_data PROF_DATA_START COMPILER_RT_VISIBILITY
|
|
COMPILER_RT_WEAK;
|
|
extern __llvm_profile_data PROF_DATA_STOP COMPILER_RT_VISIBILITY
|
|
COMPILER_RT_WEAK;
|
|
extern char PROF_CNTS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_CNTS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern VTableProfData PROF_VTABLE_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern VTableProfData PROF_VTABLE_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_VNAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_VNAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_BITS_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_BITS_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_NAME_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern char PROF_NAME_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern ValueProfNode PROF_VNODES_START COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern ValueProfNode PROF_VNODES_STOP COMPILER_RT_VISIBILITY COMPILER_RT_WEAK;
|
|
extern __llvm_gcov_init_func_struct PROF_COVINIT_START COMPILER_RT_VISIBILITY
|
|
COMPILER_RT_WEAK;
|
|
extern __llvm_gcov_init_func_struct PROF_COVINIT_STOP COMPILER_RT_VISIBILITY
|
|
COMPILER_RT_WEAK;
|
|
|
|
COMPILER_RT_VISIBILITY const __llvm_profile_data *
|
|
__llvm_profile_begin_data(void) {
|
|
return &PROF_DATA_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY const __llvm_profile_data *
|
|
__llvm_profile_end_data(void) {
|
|
return &PROF_DATA_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_names(void) {
|
|
return &PROF_NAME_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY const char *__llvm_profile_end_names(void) {
|
|
return &PROF_NAME_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY const char *__llvm_profile_begin_vtabnames(void) {
|
|
return &PROF_VNAME_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY const char *__llvm_profile_end_vtabnames(void) {
|
|
return &PROF_VNAME_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY const VTableProfData *
|
|
__llvm_profile_begin_vtables(void) {
|
|
return &PROF_VTABLE_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY const VTableProfData *__llvm_profile_end_vtables(void) {
|
|
return &PROF_VTABLE_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY char *__llvm_profile_begin_counters(void) {
|
|
return &PROF_CNTS_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY char *__llvm_profile_end_counters(void) {
|
|
return &PROF_CNTS_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY char *__llvm_profile_begin_bitmap(void) {
|
|
return &PROF_BITS_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY char *__llvm_profile_end_bitmap(void) {
|
|
return &PROF_BITS_STOP;
|
|
}
|
|
|
|
COMPILER_RT_VISIBILITY ValueProfNode *
|
|
__llvm_profile_begin_vnodes(void) {
|
|
return &PROF_VNODES_START;
|
|
}
|
|
COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
|
|
return &PROF_VNODES_STOP;
|
|
}
|
|
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
|
|
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
|
|
|
|
COMPILER_RT_VISIBILITY const __llvm_gcov_init_func_struct *
|
|
__llvm_profile_begin_covinit() {
|
|
return &PROF_COVINIT_START;
|
|
}
|
|
|
|
COMPILER_RT_VISIBILITY const __llvm_gcov_init_func_struct *
|
|
__llvm_profile_end_covinit() {
|
|
return &PROF_COVINIT_STOP;
|
|
}
|
|
|
|
#ifdef NT_GNU_BUILD_ID
|
|
static size_t RoundUp(size_t size, size_t align) {
|
|
return (size + align - 1) & ~(align - 1);
|
|
}
|
|
|
|
/*
|
|
* Look for the note that has the name "GNU\0" and type NT_GNU_BUILD_ID
|
|
* that contains build id. If build id exists, write binary id.
|
|
*
|
|
* Each note in notes section starts with a struct which includes
|
|
* n_namesz, n_descsz, and n_type members. It is followed by the name
|
|
* (whose length is defined in n_namesz) and then by the descriptor
|
|
* (whose length is defined in n_descsz).
|
|
*
|
|
* Note sections like .note.ABI-tag and .note.gnu.build-id are aligned
|
|
* to 4 bytes, so round n_namesz and n_descsz to the nearest 4 bytes.
|
|
*/
|
|
static int WriteBinaryIdForNote(ProfDataWriter *Writer,
|
|
const ElfW(Nhdr) * Note) {
|
|
int BinaryIdSize = 0;
|
|
const char *NoteName = (const char *)Note + sizeof(ElfW(Nhdr));
|
|
if (Note->n_type == NT_GNU_BUILD_ID && Note->n_namesz == 4 &&
|
|
memcmp(NoteName, "GNU\0", 4) == 0) {
|
|
uint64_t BinaryIdLen = Note->n_descsz;
|
|
const uint8_t *BinaryIdData =
|
|
(const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
|
|
uint8_t BinaryIdPadding = __llvm_profile_get_num_padding_bytes(BinaryIdLen);
|
|
if (Writer != NULL &&
|
|
lprofWriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData,
|
|
BinaryIdPadding) == -1)
|
|
return -1;
|
|
|
|
BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen + BinaryIdPadding;
|
|
}
|
|
|
|
return BinaryIdSize;
|
|
}
|
|
|
|
/*
|
|
* Helper function that iterates through notes section and find build ids.
|
|
* If writer is given, write binary ids into profiles.
|
|
* If an error happens while writing, return -1.
|
|
*/
|
|
static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
|
|
const ElfW(Nhdr) * NotesEnd) {
|
|
int BinaryIdsSize = 0;
|
|
while (Note < NotesEnd) {
|
|
int OneBinaryIdSize = WriteBinaryIdForNote(Writer, Note);
|
|
if (OneBinaryIdSize == -1)
|
|
return -1;
|
|
BinaryIdsSize += OneBinaryIdSize;
|
|
|
|
/* Calculate the offset of the next note in notes section. */
|
|
size_t NoteOffset = sizeof(ElfW(Nhdr)) + RoundUp(Note->n_namesz, 4) +
|
|
RoundUp(Note->n_descsz, 4);
|
|
Note = (const ElfW(Nhdr) *)((const char *)(Note) + NoteOffset);
|
|
}
|
|
|
|
return BinaryIdsSize;
|
|
}
|
|
|
|
/*
|
|
* Write binary ids into profiles if writer is given.
|
|
* Return the total size of binary ids.
|
|
* If an error happens while writing, return -1.
|
|
*/
|
|
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
|
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
|
|
extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
|
|
|
|
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
|
|
const ElfW(Phdr) *ProgramHeader =
|
|
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
|
|
|
|
/* Compute the added base address in case of position-independent code. */
|
|
uintptr_t Base = 0;
|
|
for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
|
if (ProgramHeader[I].p_type == PT_PHDR)
|
|
Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
|
|
if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
|
|
Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
|
|
}
|
|
|
|
int TotalBinaryIdsSize = 0;
|
|
/* Iterate through entries in the program header. */
|
|
for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
|
/* Look for the notes segment in program header entries. */
|
|
if (ProgramHeader[I].p_type != PT_NOTE)
|
|
continue;
|
|
|
|
/* There can be multiple notes segment, and examine each of them. */
|
|
const ElfW(Nhdr) *Note =
|
|
(const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
|
|
const ElfW(Nhdr) *NotesEnd =
|
|
(const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
|
|
|
|
int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
|
|
if (TotalBinaryIdsSize == -1)
|
|
return -1;
|
|
|
|
TotalBinaryIdsSize += BinaryIdsSize;
|
|
}
|
|
|
|
return TotalBinaryIdsSize;
|
|
}
|
|
#elif !defined(_AIX) /* !NT_GNU_BUILD_ID */
|
|
/*
|
|
* Fallback implementation for targets that don't support the GNU
|
|
* extensions NT_GNU_BUILD_ID and __ehdr_start.
|
|
*/
|
|
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#endif
|