Files
Kevin Sala Penades 802de7ebd1 [offload] Allow replay repetitions and report basic timing (#193388)
This commit extends the kernel replay tool to perform multiple replay
repetitions on the same process. It also prints the execution time of
the kernel replay, which includes the kernel launch and kernel
synchronization (replay I/O time is excluded). Precise kernel timing
should be obtained through the corresponding profiling tools for now.

The output report after recording has been improved as well.
2026-04-22 15:22:23 -07:00

89 lines
3.3 KiB
C++

//===---------- private.h - Target independent OpenMP target RTL ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Private function declarations and helper macros for debugging output.
//
//===----------------------------------------------------------------------===//
#ifndef _OMPTARGET_PRIVATE_H
#define _OMPTARGET_PRIVATE_H
#include "Shared/Debug.h"
#include "Shared/SourceInfo.h"
#include "OpenMP/InternalTypes.h"
#include "device.h"
#include "omptarget.h"
#include <cstdint>
extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
extern int target_activate_rr(DeviceTy &Device, uint64_t MemorySize,
void *ReqAddr, bool IsRecord, bool SaveOutput,
bool EmitReport, const char *OutputDirPath);
extern int
target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr, void *DeviceMemory,
int64_t DeviceMemorySize, void *ReuseDeviceAlloc,
const llvm::offloading::EntryTy *Globals, int32_t NumGlobals,
void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs,
int32_t NumTeams, int32_t ThreadLimit, uint32_t SharedMemorySize,
uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo,
KernelReplayOutcomeTy *ReplayOutcome);
extern void handleTargetOutcome(bool Success, ident_t *Loc);
////////////////////////////////////////////////////////////////////////////////
/// Print out the names and properties of the arguments to each kernel
static inline void
printKernelArguments(const ident_t *Loc, const int64_t DeviceId,
const int32_t ArgNum, const int64_t *ArgSizes,
const int64_t *ArgTypes, const map_var_info_t *ArgNames,
const char *RegionType) {
SourceInfo Info(Loc);
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s at %s:%d:%d with %d arguments:\n",
RegionType, Info.getFilename(), Info.getLine(), Info.getColumn(),
ArgNum);
for (int32_t I = 0; I < ArgNum; ++I) {
const map_var_info_t VarName = (ArgNames) ? ArgNames[I] : nullptr;
const char *Type = nullptr;
const char *Implicit =
(ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH &&
ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS)
Type = "attach:always";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH)
Type = "attach";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO &&
ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
Type = "tofrom";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO)
Type = "to";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
Type = "from";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)
Type = "private";
else if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL)
Type = "firstprivate";
else if (ArgSizes[I] != 0)
Type = "alloc";
else
Type = "use_address";
INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", Type,
getNameFromMapping(VarName).c_str(), ArgSizes[I], Implicit);
}
}
#endif