Reland "[OpenMP][Offload] Handle present/to/from when a different entry did alloc/delete." (#184260)
Some tests that were checking for prints inside/outside `target` regions
needed to be updated to work on systems where the ordering wasn't
deterministic.
Reverts llvm/llvm-project#184240
Original description from #165494:
-----
OpenMP allows cases like the following:
```c
int *p1, *p2, x;
p1 = p2 = &x;
...
#pragma omp target_exit_data map(delete: p1[:]) from(p2[0])
```
Which means, when the runtime encounters the `from` entry, the ref-count
may
not be zero, but it will go down to zero at the end of the current
construct,
which should cause the "from" transfer to happen.
Similarly, a user may have:
```c
struct S {
int *p;
};
#pragma omp declare_mapper (id1: S s) map(s.p) map(present, alloc: s.p[0:10])
#pragma omp declare_mapper (id2: S s) map(s.p, s.p[0:10])
S s1;
// present-check should fail here
#pragma omp target_enter_data map(alloc: s.p[0:10]) map(mapper(id1), to: s)
// "to" should be honored here
#pragma omp target_enter_data map(alloc: s.p[0:10]) map(mapper(id2), to: s)
```
Where the allocation happens before the "to" entry is encountered by the
runtime. Or, an allocation happens before a "present" entry is
encountered.
To handle cases like this, we need to use the state information of
previously
seen new allocations, deletions, "from" entries, when honoring
`to`/`from`/`present` map entries.
-----
This commit is contained in:
@@ -495,20 +495,110 @@ struct AttachMapInfo {
|
||||
MapType(Type), Pointername(Name) {}
|
||||
};
|
||||
|
||||
/// Structure to track ATTACH entries and new allocations across recursive calls
|
||||
/// (for handling mappers) to targetDataBegin for a given construct.
|
||||
struct AttachInfoTy {
|
||||
/// ATTACH map entries for deferred processing.
|
||||
/// Structure to track new allocations, ATTACH entries, DELETE entries and
|
||||
/// skipped FROM data transfer information for a given construct, across
|
||||
/// recursive calls (for handling mappers) to targetDataBegin/targetDataEnd.
|
||||
struct StateInfoTy {
|
||||
/// ATTACH map entries for deferred processing until all other maps are done.
|
||||
llvm::SmallVector<AttachMapInfo> AttachEntries;
|
||||
|
||||
/// Host pointers for which new memory was allocated.
|
||||
/// Key: host pointer, Value: allocation size.
|
||||
llvm::DenseMap<void *, int64_t> NewAllocations;
|
||||
|
||||
AttachInfoTy() = default;
|
||||
/// Host pointers that had a FROM entry, but for which a data transfer was
|
||||
/// skipped due to the ref-count not being zero.
|
||||
/// Key: host pointer, Value: data size.
|
||||
llvm::DenseMap<void *, int64_t> SkippedFromEntries;
|
||||
|
||||
/// Host pointers for which we have triggered a FROM transfer at some point
|
||||
/// during targetDataEnd. It's used to avoid duplicate transfers.
|
||||
/// Key: host pointer, Value: transferred size.
|
||||
llvm::DenseMap<void *, int64_t> TransferredFromEntries;
|
||||
|
||||
/// Starting host address and size of entries whose ref-count went to zero.
|
||||
/// This includes entries released through explicit DELETE, or normal
|
||||
/// ref-count decrements. It's used to ensure transfers are performed for FROM
|
||||
/// entries whose ref-count is already zero when the entry is encountered.
|
||||
/// Key: host pointer, Value: size.
|
||||
llvm::DenseMap<void *, int64_t> ReleasedEntries;
|
||||
|
||||
StateInfoTy() = default;
|
||||
|
||||
// Delete copy constructor and copy assignment operator to prevent copying
|
||||
AttachInfoTy(const AttachInfoTy &) = delete;
|
||||
AttachInfoTy &operator=(const AttachInfoTy &) = delete;
|
||||
StateInfoTy(const StateInfoTy &) = delete;
|
||||
StateInfoTy &operator=(const StateInfoTy &) = delete;
|
||||
|
||||
private:
|
||||
/// Helper to find an entry in \p EntryMap that contains the pointer.
|
||||
/// Returns the matching entry if found, otherwise std::nullopt.
|
||||
std::optional<std::pair<void *, int64_t>>
|
||||
findEntryForPtr(void *Ptr,
|
||||
const llvm::DenseMap<void *, int64_t> &EntryMap) const {
|
||||
for (const auto &Entry : EntryMap) {
|
||||
void *EntryBegin = Entry.first;
|
||||
int64_t EntrySize = Entry.second;
|
||||
if (Ptr >= EntryBegin &&
|
||||
Ptr < static_cast<void *>(static_cast<char *>(EntryBegin) +
|
||||
EntrySize)) {
|
||||
return Entry;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
public:
|
||||
/// Check if a pointer falls within any of the newly allocated ranges.
|
||||
/// Returns the matching entry if found, otherwise std::nullopt.
|
||||
std::optional<std::pair<void *, int64_t>> wasNewlyAllocated(void *Ptr) const {
|
||||
return findEntryForPtr(Ptr, NewAllocations);
|
||||
}
|
||||
|
||||
/// Check if a pointer range [Ptr, Ptr+Size) is fully contained within any
|
||||
/// previously completed FROM transfer.
|
||||
/// Returns the matching entry if found, otherwise std::nullopt.
|
||||
std::optional<std::pair<void *, int64_t>>
|
||||
wasTransferredFrom(void *Ptr, int64_t Size) const {
|
||||
uintptr_t CheckBegin = reinterpret_cast<uintptr_t>(Ptr);
|
||||
uintptr_t CheckEnd = CheckBegin + Size;
|
||||
|
||||
for (const auto &Entry : TransferredFromEntries) {
|
||||
void *RangePtr = Entry.first;
|
||||
int64_t RangeSize = Entry.second;
|
||||
uintptr_t RangeBegin = reinterpret_cast<uintptr_t>(RangePtr);
|
||||
uintptr_t RangeEnd = RangeBegin + RangeSize;
|
||||
|
||||
if (CheckBegin >= RangeBegin && CheckEnd <= RangeEnd) {
|
||||
return Entry;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Check if a pointer falls within any released entry's range.
|
||||
/// Returns the matching entry if found, otherwise std::nullopt.
|
||||
std::optional<std::pair<void *, int64_t>>
|
||||
wasPreviouslyReleased(void *Ptr) const {
|
||||
return findEntryForPtr(Ptr, ReleasedEntries);
|
||||
}
|
||||
|
||||
/// Add a skipped FROM entry. Only updates the entry if this is a new pointer
|
||||
/// or if the new size is larger than the existing entry.
|
||||
void addSkippedFromEntry(void *Ptr, int64_t Size) {
|
||||
auto It = SkippedFromEntries.find(Ptr);
|
||||
if (It == SkippedFromEntries.end() || Size > It->second) {
|
||||
SkippedFromEntries[Ptr] = Size;
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a transferred FROM entry. Only updates the entry if this is a new
|
||||
/// pointer or if the new size is larger than the existing entry.
|
||||
void addTransferredFromEntry(void *Ptr, int64_t Size) {
|
||||
auto It = TransferredFromEntries.find(Ptr);
|
||||
if (It == TransferredFromEntries.end() || Size > It->second) {
|
||||
TransferredFromEntries[Ptr] = Size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Function pointer type for targetData* functions (targetDataBegin,
|
||||
@@ -516,7 +606,7 @@ struct AttachInfoTy {
|
||||
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
|
||||
void **, int64_t *, int64_t *,
|
||||
map_var_info_t *, void **, AsyncInfoTy &,
|
||||
AttachInfoTy *, bool);
|
||||
StateInfoTy *, bool);
|
||||
|
||||
void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
|
||||
bool toStdOut = false);
|
||||
@@ -525,24 +615,22 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo = nullptr,
|
||||
bool FromMapper = false);
|
||||
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
|
||||
|
||||
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false);
|
||||
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
|
||||
|
||||
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo = nullptr,
|
||||
bool FromMapper = false);
|
||||
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
|
||||
|
||||
// Process deferred ATTACH map entries collected during targetDataBegin.
|
||||
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo,
|
||||
AsyncInfoTy &AsyncInfo);
|
||||
|
||||
struct MappingInfoTy {
|
||||
@@ -583,7 +671,7 @@ struct MappingInfoTy {
|
||||
bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
|
||||
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
|
||||
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
|
||||
bool ReleaseHDTTMap = true);
|
||||
bool ReleaseHDTTMap = true, StateInfoTy *StateInfo = nullptr);
|
||||
|
||||
/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
|
||||
/// ensures exclusive access to the HDTT map.
|
||||
|
||||
@@ -209,7 +209,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
|
||||
int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
|
||||
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
|
||||
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
|
||||
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
|
||||
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap,
|
||||
StateInfoTy *StateInfo) {
|
||||
|
||||
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
|
||||
LR.TPR.Flags.IsPresent = true;
|
||||
@@ -328,10 +329,36 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
|
||||
if (ReleaseHDTTMap)
|
||||
HDTTMap.destroy();
|
||||
|
||||
// If the target pointer is valid, and we need to transfer data, issue the
|
||||
// data transfer.
|
||||
// Lambda to check if this pointer was newly allocated on the current region.
|
||||
// This is needed to handle cases when the TO entry is encountered after an
|
||||
// alloc entry for the same pointer. In such cases, the ref-count is already
|
||||
// non-zero when TO is encountered, but we still need to do a transfer. e.g.
|
||||
//
|
||||
// struct S {
|
||||
// int *p;
|
||||
// };
|
||||
// #pragma omp declare mapper(id : S s) map(to: s.p, s.p[0 : 10])
|
||||
//
|
||||
// S s1;
|
||||
// ...
|
||||
// #pragma omp target map(alloc : s1.p[0 : 10]) map(mapper(id), to : s1)
|
||||
auto WasNewlyAllocatedForCurrentRegion = [&]() {
|
||||
if (!StateInfo)
|
||||
return false;
|
||||
bool WasNewlyAllocated =
|
||||
StateInfo->wasNewlyAllocated(HstPtrBegin).has_value();
|
||||
if (WasNewlyAllocated)
|
||||
ODBG(ODT_Mapping) << "HstPtrBegin " << HstPtrBegin
|
||||
<< " was newly allocated for the current region";
|
||||
return WasNewlyAllocated;
|
||||
};
|
||||
|
||||
// Even if this isn't a new entry, we still need to do a data-transfer if
|
||||
// the pointer was newly allocated on the current target region.
|
||||
if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo &&
|
||||
(LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) {
|
||||
(LR.TPR.Flags.IsNewEntry || HasFlagAlways ||
|
||||
WasNewlyAllocatedForCurrentRegion()) &&
|
||||
Size != 0) {
|
||||
|
||||
// If we have something like:
|
||||
// #pragma omp target map(to: s.myarr[0:10]) map(to: s.myarr[0:10])
|
||||
|
||||
@@ -168,19 +168,22 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
|
||||
|
||||
int Rc = OFFLOAD_SUCCESS;
|
||||
|
||||
// Only allocate AttachInfo for targetDataBegin
|
||||
std::unique_ptr<AttachInfoTy> AttachInfo;
|
||||
if (TargetDataFunction == targetDataBegin)
|
||||
AttachInfo = std::make_unique<AttachInfoTy>();
|
||||
// Allocate StateInfo for targetDataBegin and targetDataEnd to track
|
||||
// allocations, pointer attachments and deferred transfers.
|
||||
// This is not needed for targetDataUpdate.
|
||||
std::unique_ptr<StateInfoTy> StateInfo;
|
||||
if (TargetDataFunction == targetDataBegin ||
|
||||
TargetDataFunction == targetDataEnd)
|
||||
StateInfo = std::make_unique<StateInfoTy>();
|
||||
|
||||
Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
|
||||
AttachInfo.get(), /*FromMapper=*/false);
|
||||
StateInfo.get(), /*FromMapper=*/false);
|
||||
|
||||
if (Rc == OFFLOAD_SUCCESS) {
|
||||
// Process deferred ATTACH entries BEFORE synchronization
|
||||
if (AttachInfo && !AttachInfo->AttachEntries.empty())
|
||||
Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo);
|
||||
if (StateInfo && !StateInfo->AttachEntries.empty())
|
||||
Rc = processAttachEntries(*DeviceOrErr, *StateInfo, AsyncInfo);
|
||||
|
||||
if (Rc == OFFLOAD_SUCCESS)
|
||||
Rc = AsyncInfo.synchronize();
|
||||
|
||||
@@ -298,7 +298,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
|
||||
int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
|
||||
void *ArgMapper, AsyncInfoTy &AsyncInfo,
|
||||
TargetDataFuncPtrTy TargetDataFunction,
|
||||
AttachInfoTy *AttachInfo = nullptr) {
|
||||
StateInfoTy *StateInfo = nullptr) {
|
||||
ODBG(ODT_Interface) << "Calling the mapper function " << ArgMapper;
|
||||
|
||||
// The mapper function fills up Components.
|
||||
@@ -329,7 +329,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
|
||||
MapperArgsBase.data(), MapperArgs.data(),
|
||||
MapperArgSizes.data(), MapperArgTypes.data(),
|
||||
MapperArgNames.data(), /*arg_mappers*/ nullptr,
|
||||
AsyncInfo, AttachInfo, /*FromMapper=*/true);
|
||||
AsyncInfo, StateInfo, /*FromMapper=*/true);
|
||||
|
||||
return Rc;
|
||||
}
|
||||
@@ -512,9 +512,9 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo, bool FromMapper) {
|
||||
assert(AttachInfo && "AttachInfo must be available for targetDataBegin for "
|
||||
"handling ATTACH map-types.");
|
||||
StateInfoTy *StateInfo, bool FromMapper) {
|
||||
assert(StateInfo && "StateInfo must be available for targetDataBegin for "
|
||||
"handling ATTACH and TO/TOFROM map-types.");
|
||||
// process each input.
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
// Ignore private variables and arrays - there is no mapping for them.
|
||||
@@ -533,7 +533,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataBegin, AttachInfo);
|
||||
targetDataBegin, StateInfo);
|
||||
|
||||
if (Rc != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Call to targetDataBegin via targetDataMapper for custom "
|
||||
@@ -560,7 +560,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// similar to firstprivate (PRIVATE | TO) entries by
|
||||
// PrivateArgumentManager.
|
||||
if (!IsCorrespondingPointerInit)
|
||||
AttachInfo->AttachEntries.emplace_back(
|
||||
StateInfo->AttachEntries.emplace_back(
|
||||
/*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
|
||||
/*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
|
||||
/*PointeeName=*/HstPtrName);
|
||||
@@ -637,7 +637,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
|
||||
// Track new allocation, for eventual use in attachment decision-making.
|
||||
if (PointerTpr.Flags.IsNewEntry && !IsHostPtr)
|
||||
AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *);
|
||||
StateInfo->NewAllocations[HstPtrBase] = sizeof(void *);
|
||||
|
||||
ODBG(ODT_Mapping) << "There are " << sizeof(void *)
|
||||
<< " bytes allocated at target address "
|
||||
@@ -659,7 +659,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
auto TPR = Device.getMappingInfo().getTargetPointer(
|
||||
HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName,
|
||||
HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
|
||||
HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry());
|
||||
HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry(),
|
||||
/*ReleaseHDTTMap=*/true, StateInfo);
|
||||
void *TgtPtrBegin = TPR.TargetPointer;
|
||||
IsHostPtr = TPR.Flags.IsHostPointer;
|
||||
// If data_size==0, then the argument could be a zero-length pointer to
|
||||
@@ -670,11 +671,26 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
: "device failure or illegal mapping")
|
||||
<< ").";
|
||||
return OFFLOAD_FAIL;
|
||||
} else if (TgtPtrBegin && HasPresentModifier &&
|
||||
StateInfo->wasNewlyAllocated(HstPtrBegin).has_value()) {
|
||||
// For "PRESENT" entries, we may have cases like the following:
|
||||
// int *xp = &x[0];
|
||||
// map(alloc: x[:]) map(present, alloc: xp[1])
|
||||
// The "PRESENT" entry may be encountered after a previous entry
|
||||
// allocated new storage for the pointer.
|
||||
// To catch such cases, we need to look at any existing allocations
|
||||
// and error out if we have any matching the pointer.
|
||||
MESSAGE("device mapping required by 'present' map type modifier does not "
|
||||
"exist for host address " DPxMOD " (%" PRId64 " bytes)\n",
|
||||
DPxPTR(HstPtrBegin), DataSize);
|
||||
REPORT() << "Pointer " << HstPtrBegin
|
||||
<< " was not present on the device upon entry to the region.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
// Track new allocation, for eventual use in attachment decision-making.
|
||||
// Track new allocation, for eventual use in attachment/to decision-making.
|
||||
if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin)
|
||||
AttachInfo->NewAllocations[HstPtrBegin] = DataSize;
|
||||
StateInfo->NewAllocations[HstPtrBegin] = DataSize;
|
||||
|
||||
ODBG(ODT_Mapping) << "There are " << DataSize
|
||||
<< " bytes allocated at target address " << TgtPtrBegin
|
||||
@@ -794,24 +810,24 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
///
|
||||
/// For this purpose, we insert a data_fence before the first
|
||||
/// pointer-attachment, (3), to ensure that all pending transfers finish first.
|
||||
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo,
|
||||
AsyncInfoTy &AsyncInfo) {
|
||||
// Report all tracked allocations from both main loop and ATTACH processing
|
||||
if (!AttachInfo.NewAllocations.empty()) {
|
||||
if (!StateInfo.NewAllocations.empty()) {
|
||||
ODBG_OS(ODT_Mapping, [&](llvm::raw_ostream &OS) {
|
||||
OS << "Tracked " << AttachInfo.NewAllocations.size()
|
||||
OS << "Tracked " << StateInfo.NewAllocations.size()
|
||||
<< " total new allocations:";
|
||||
for (const auto &Alloc : AttachInfo.NewAllocations) {
|
||||
for (const auto &Alloc : StateInfo.NewAllocations) {
|
||||
OS << " Host ptr: " << Alloc.first << ", Size: " << Alloc.second
|
||||
<< " bytes";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (AttachInfo.AttachEntries.empty())
|
||||
if (StateInfo.AttachEntries.empty())
|
||||
return OFFLOAD_SUCCESS;
|
||||
|
||||
ODBG(ODT_Mapping) << "Processing " << AttachInfo.AttachEntries.size()
|
||||
ODBG(ODT_Mapping) << "Processing " << StateInfo.AttachEntries.size()
|
||||
<< " deferred ATTACH map entries";
|
||||
|
||||
bool TreatAttachAutoAsAlways = MappingConfig::get().TreatAttachAutoAsAlways;
|
||||
@@ -821,9 +837,9 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
bool IsFirstPointerAttachment = true;
|
||||
for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
|
||||
for (size_t EntryIdx = 0; EntryIdx < StateInfo.AttachEntries.size();
|
||||
++EntryIdx) {
|
||||
const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
|
||||
const auto &AttachEntry = StateInfo.AttachEntries[EntryIdx];
|
||||
|
||||
void **HstPtr = reinterpret_cast<void **>(AttachEntry.PointerBase);
|
||||
|
||||
@@ -844,18 +860,11 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
|
||||
|
||||
// Lambda to check if a pointer was newly allocated
|
||||
auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) {
|
||||
bool IsNewlyAllocated =
|
||||
llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) {
|
||||
void *AllocPtr = Alloc.first;
|
||||
int64_t AllocSize = Alloc.second;
|
||||
return Ptr >= AllocPtr &&
|
||||
Ptr < reinterpret_cast<void *>(
|
||||
reinterpret_cast<char *>(AllocPtr) + AllocSize);
|
||||
});
|
||||
bool WasNewlyAllocated = StateInfo.wasNewlyAllocated(Ptr).has_value();
|
||||
ODBG(ODT_Mapping) << "Attach " << PtrName << " " << Ptr
|
||||
<< " was newly allocated: "
|
||||
<< (IsNewlyAllocated ? "yes" : "no");
|
||||
return IsNewlyAllocated;
|
||||
<< (WasNewlyAllocated ? "yes" : "no");
|
||||
return WasNewlyAllocated;
|
||||
};
|
||||
|
||||
// Only process ATTACH if either the pointee or the pointer was newly
|
||||
@@ -1065,7 +1074,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgBases, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo, bool FromMapper) {
|
||||
StateInfoTy *StateInfo, bool FromMapper) {
|
||||
assert(StateInfo && "StateInfo is required for targetDataEnd for handling "
|
||||
"FROM data transfers");
|
||||
int Ret = OFFLOAD_SUCCESS;
|
||||
auto *PostProcessingPtrs = new SmallVector<PostProcessingInfo>();
|
||||
// process each input.
|
||||
@@ -1094,7 +1105,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
|
||||
Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
|
||||
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
|
||||
targetDataEnd);
|
||||
targetDataEnd, StateInfo);
|
||||
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Call to targetDataEnd via targetDataMapper for custom "
|
||||
@@ -1162,26 +1173,65 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
if (!TPR.isPresent())
|
||||
continue;
|
||||
|
||||
// Track entries whose ref-count went to zero (IsLast=true) so that we
|
||||
// can honor any subsequently encountered FROM entries that fall within
|
||||
// their range.
|
||||
if (TPR.Flags.IsLast) {
|
||||
// For assumed-size arrays like map(delete: p[:]), the compiler provides
|
||||
// no size information, so we need to get the actual allocated extent from
|
||||
// the HDTT entry.
|
||||
void *ReleasedHstPtrBegin =
|
||||
reinterpret_cast<void *>(TPR.getEntry()->HstPtrBegin);
|
||||
int64_t ReleasedSize =
|
||||
TPR.getEntry()->HstPtrEnd - TPR.getEntry()->HstPtrBegin;
|
||||
ODBG(ODT_Mapping) << "Tracking released entry: HstPtr="
|
||||
<< ReleasedHstPtrBegin << ", Size=" << ReleasedSize
|
||||
<< ", ForceDelete=" << ForceDelete;
|
||||
StateInfo->ReleasedEntries[ReleasedHstPtrBegin] = ReleasedSize;
|
||||
}
|
||||
|
||||
// Move data back to the host
|
||||
const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
|
||||
if (HasFrom && (HasAlways || TPR.Flags.IsLast) &&
|
||||
!TPR.Flags.IsHostPointer && DataSize != 0) {
|
||||
ODBG(ODT_Mapping) << "Moving " << DataSize
|
||||
<< " bytes (tgt:" << TgtPtrBegin
|
||||
<< ") -> (hst:" << HstPtrBegin << ")";
|
||||
|
||||
// Lambda to perform the actual FROM data retrieval from device to host
|
||||
auto PerformFromRetrieval = [&](void *HstPtr, void *TgtPtr, int64_t Size,
|
||||
HostDataToTargetTy *Entry) -> int {
|
||||
// Check if this FROM transfer can be skipped.
|
||||
//
|
||||
// This is an optimization that may help in rare cases when we have
|
||||
// multiple overlapping FROM entries. e.g.
|
||||
//
|
||||
// ... map(always, from: x) map(always, from: x)
|
||||
// ... map(delete: x) map(from: x) map(from: x)
|
||||
//
|
||||
// If we think the overhead makes it not worh it, we can remove it.
|
||||
if (auto TransferredEntry = StateInfo->wasTransferredFrom(HstPtr, Size)) {
|
||||
void *TransferredPtr = TransferredEntry->first;
|
||||
int64_t TransferredSize = TransferredEntry->second;
|
||||
ODBG(ODT_Mapping) << "FROM entry HstPtr=" << HstPtr << " size=" << Size
|
||||
<< " already transferred within [" << TransferredPtr
|
||||
<< ", "
|
||||
<< static_cast<void *>(
|
||||
static_cast<char *>(TransferredPtr) +
|
||||
TransferredSize)
|
||||
<< ")";
|
||||
return OFFLOAD_SUCCESS;
|
||||
}
|
||||
|
||||
ODBG(ODT_Mapping) << "Moving " << Size << " bytes (tgt:" << TgtPtr
|
||||
<< ") -> (hst:" << HstPtr << ")";
|
||||
TIMESCOPE_WITH_DETAILS_AND_IDENT(
|
||||
"DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc);
|
||||
"DevToHost", "Size=" + std::to_string(Size) + "B", Loc);
|
||||
// Wait for any previous transfer if an event is present.
|
||||
if (void *Event = TPR.getEntry()->getEvent()) {
|
||||
if (void *Event = Entry->getEvent()) {
|
||||
if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Failed to wait for event " << Event << ".";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
}
|
||||
|
||||
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
|
||||
TPR.getEntry());
|
||||
int Ret = Device.retrieveData(HstPtr, TgtPtr, Size, AsyncInfo, Entry);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Copying data from device failed.";
|
||||
return OFFLOAD_FAIL;
|
||||
@@ -1193,10 +1243,128 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
// copy-back was issued but before it completed. Since the reuse might
|
||||
// also copy-back a value we would race.
|
||||
if (TPR.Flags.IsLast) {
|
||||
if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
|
||||
OFFLOAD_SUCCESS)
|
||||
if (Entry->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
// Track this transfer to avoid duplicate transfers later on.
|
||||
StateInfo->addTransferredFromEntry(HstPtr, Size);
|
||||
|
||||
return OFFLOAD_SUCCESS;
|
||||
};
|
||||
|
||||
// Lambda to check if this pointer was previously released.
|
||||
//
|
||||
// This is needed to handle cases like the following:
|
||||
// p1 = p2 = &x;
|
||||
// ... map(delete: p1[:]) map(from: p2[0:1])
|
||||
// The ref-count becomes zero before encountering the FROM entry, but we
|
||||
// still need to do a transfer, if it went from non-zero to zero.
|
||||
//
|
||||
// OpenMP 6.0, sec. 7.9.6 "map Clause", p. 284 L24-26:
|
||||
// If the reference count of the corresponding list item is one or if
|
||||
// the always-modifier or delete-modifier is specified, and if the map
|
||||
// type is from, the original list item is updated as if the list item
|
||||
// appeared in a from clause on a target_update directive.
|
||||
auto WasPreviouslyReleased = [&]() -> bool {
|
||||
auto ReleasedEntry = StateInfo->wasPreviouslyReleased(HstPtrBegin);
|
||||
if (!ReleasedEntry)
|
||||
return false;
|
||||
|
||||
void *ReleasedPtr = ReleasedEntry->first;
|
||||
int64_t ReleasedSize = ReleasedEntry->second;
|
||||
ODBG(ODT_Mapping) << "Pointer HstPtr=" << HstPtrBegin
|
||||
<< " falls within a range previously released ["
|
||||
<< ReleasedPtr << ", "
|
||||
<< static_cast<void *>(
|
||||
static_cast<char *>(ReleasedPtr) + ReleasedSize)
|
||||
<< ") with size=" << ReleasedSize;
|
||||
return true;
|
||||
};
|
||||
|
||||
bool IsMapFromOnNonHostNonZeroData =
|
||||
HasFrom && !TPR.Flags.IsHostPointer && DataSize != 0;
|
||||
|
||||
auto IsLastOrHasAlwaysOrWasReleased = [&]() {
|
||||
return TPR.Flags.IsLast || HasAlways || WasPreviouslyReleased();
|
||||
};
|
||||
|
||||
if (IsMapFromOnNonHostNonZeroData && IsLastOrHasAlwaysOrWasReleased()) {
|
||||
Ret = PerformFromRetrieval(HstPtrBegin, TgtPtrBegin, DataSize,
|
||||
TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
} else if (IsMapFromOnNonHostNonZeroData) {
|
||||
// We can have cases like the following:
|
||||
// p1 = p2 = &x;
|
||||
// ... map(storage: p1[:]) map(from: p2[1:1])
|
||||
//
|
||||
// where it's possible that when the FROM entry is processed, the
|
||||
// ref count is not zero, so no data transfer happens for it. But
|
||||
// the ref-count can go down to zero once all maps have been processed
|
||||
// for the current construct, in which case a transfer should happen.
|
||||
//
|
||||
// So, we keep track of any skipped FROM data-transfers, in case
|
||||
// the ref-count goes down to zero later on.
|
||||
//
|
||||
// This cannot be handled in the compiler for all cases because the
|
||||
// list-items may look very different, as shown in the example above,
|
||||
// which is allowed with OpenMP 6.0:
|
||||
//
|
||||
// OpenMP 6.0, sec. 7.9.6 "map Clause", p. 286 L18-21:
|
||||
// Two list items of the map clauses on the same construct must not share
|
||||
// original storage unless one of the following is true: they are the same
|
||||
// list item, one is the containing structure of the other, at least one
|
||||
// is an assumed-size array, or at least one is implicitly mapped due to
|
||||
// the list item also appearing in a use_device_addr clause.
|
||||
StateInfo->addSkippedFromEntry(HstPtrBegin, DataSize);
|
||||
ODBG(ODT_Mapping) << "Skipping FROM map transfer for HstPtr="
|
||||
<< HstPtrBegin << " size=" << DataSize
|
||||
<< " (IsLast=" << TPR.Flags.IsLast << ", TotalRefCount="
|
||||
<< TPR.getEntry()->getTotalRefCount() << ")";
|
||||
}
|
||||
|
||||
// If the ref-count went to zero (IsLast=true), check if any previously
|
||||
// skipped FROM entries fall within this released entry's range.
|
||||
if (TPR.Flags.IsLast && !StateInfo->SkippedFromEntries.empty()) {
|
||||
uintptr_t ReleasedBeginPtrInt = TPR.getEntry()->HstPtrBegin;
|
||||
uintptr_t ReleasedEndPtrInt = TPR.getEntry()->HstPtrEnd;
|
||||
SmallVector<void *, 32> ToRemove;
|
||||
|
||||
for (auto &SkippedFromEntry : StateInfo->SkippedFromEntries) {
|
||||
void *FromBeginPtr = SkippedFromEntry.first;
|
||||
int64_t FromDataSize = SkippedFromEntry.second;
|
||||
uintptr_t FromBeginPtrInt = reinterpret_cast<uintptr_t>(FromBeginPtr);
|
||||
|
||||
// Check if this skipped FROM entry's starting pointer falls within this
|
||||
// released entry
|
||||
if (FromBeginPtrInt >= ReleasedBeginPtrInt &&
|
||||
FromBeginPtrInt < ReleasedEndPtrInt) {
|
||||
ODBG(ODT_Mapping)
|
||||
<< "Found skipped FROM entry: HstPtr=" << FromBeginPtr
|
||||
<< " size=" << FromDataSize << " within region being released ["
|
||||
<< reinterpret_cast<void *>(ReleasedBeginPtrInt) << ", "
|
||||
<< reinterpret_cast<void *>(ReleasedEndPtrInt) << ")";
|
||||
|
||||
// Calculate offset within the target pointer
|
||||
int64_t Offset = FromBeginPtrInt - ReleasedBeginPtrInt;
|
||||
void *FromTgtBeginPtr =
|
||||
static_cast<void *>(static_cast<char *>(TgtPtrBegin) + Offset);
|
||||
|
||||
// Perform the retrieval for this skipped entry
|
||||
int Ret = PerformFromRetrieval(
|
||||
reinterpret_cast<void *>(FromBeginPtrInt), FromTgtBeginPtr,
|
||||
FromDataSize, TPR.getEntry());
|
||||
if (Ret != OFFLOAD_SUCCESS)
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
ToRemove.push_back(FromBeginPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove processed entries
|
||||
for (void *Ptr : ToRemove)
|
||||
StateInfo->SkippedFromEntries.erase(Ptr);
|
||||
}
|
||||
|
||||
// Add pointer to the buffer for post-synchronize processing.
|
||||
@@ -1377,7 +1545,7 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
|
||||
void **ArgsBase, void **Args, int64_t *ArgSizes,
|
||||
int64_t *ArgTypes, map_var_info_t *ArgNames,
|
||||
void **ArgMappers, AsyncInfoTy &AsyncInfo,
|
||||
AttachInfoTy *AttachInfo, bool FromMapper) {
|
||||
StateInfoTy *StateInfo, bool FromMapper) {
|
||||
// process each input.
|
||||
for (int32_t I = 0; I < ArgNum; ++I) {
|
||||
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
|
||||
@@ -1872,21 +2040,21 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
if (!DeviceOrErr)
|
||||
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
|
||||
|
||||
// Create AttachInfo for tracking any ATTACH entries, or new-allocations
|
||||
// Create StateInfo for tracking any ATTACH entries, new allocations,
|
||||
// when handling the "begin" mapping for a target constructs.
|
||||
AttachInfoTy AttachInfo;
|
||||
StateInfoTy StateInfo;
|
||||
|
||||
int Ret = targetDataBegin(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
|
||||
&AttachInfo, false /*FromMapper=*/);
|
||||
&StateInfo, false /*FromMapper=*/);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Call to targetDataBegin failed, abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
}
|
||||
|
||||
// Process collected ATTACH entries
|
||||
if (!AttachInfo.AttachEntries.empty()) {
|
||||
Ret = processAttachEntries(*DeviceOrErr, AttachInfo, AsyncInfo);
|
||||
if (!StateInfo.AttachEntries.empty()) {
|
||||
Ret = processAttachEntries(*DeviceOrErr, StateInfo, AsyncInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Failed to process ATTACH entries.";
|
||||
return OFFLOAD_FAIL;
|
||||
@@ -2053,9 +2221,14 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
|
||||
if (!DeviceOrErr)
|
||||
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
|
||||
|
||||
// Create StateInfo for tracking map(from)s for which ref-count is non-zero
|
||||
// when the entry is encountered.
|
||||
StateInfoTy StateInfo;
|
||||
|
||||
// Move data from device.
|
||||
int Ret = targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
|
||||
int Ret =
|
||||
targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
|
||||
ArgTypes, ArgNames, ArgMappers, AsyncInfo, &StateInfo);
|
||||
if (Ret != OFFLOAD_SUCCESS) {
|
||||
REPORT() << "Call to targetDataEnd failed, abort target.";
|
||||
return OFFLOAD_FAIL;
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=CHECK
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
// Since the allocation of the pointee happens on the "target" construct (1),
|
||||
// the "to" transfer requested as part of the mapper (2) should also happen.
|
||||
//
|
||||
// Similarly, the "from" transfer should also happen at the end of the target
|
||||
// construct, even if the ref-count of the pointee x has not gone down to 0
|
||||
// when "from" is encountered.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
int *p;
|
||||
int *q;
|
||||
} S;
|
||||
#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p, s.p[0 : 10]) \
|
||||
map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) \
|
||||
map(alloc : s.p[0 : 10]) // (2)
|
||||
|
||||
S s1;
|
||||
int main() {
|
||||
int x[10];
|
||||
x[1] = 111;
|
||||
s1.q = s1.p = &x[0];
|
||||
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> HstPtrBegin 0x[[#%x,HOST_ADDRX:]] was newly allocated for the current region
|
||||
// DEBUG: omptarget --> Moving [[#%u,SIZEX:]] bytes (hst:0x{{0*}}[[#HOST_ADDRX]]) -> (tgt:0x{{.*}})
|
||||
// clang-format on
|
||||
#pragma omp target map(alloc : s1.p[0 : 10]) \
|
||||
map(mapper(my_mapper), tofrom : s1) // (1)
|
||||
{
|
||||
printf("In tgt: %d\n", s1.p[1]); // CHECK-DAG: In tgt: 111
|
||||
s1.p[1] = s1.p[1] + 111;
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x{{0*}}[[#HOST_ADDRX]] size=[[#SIZEX]] within region being released
|
||||
// DEBUG: omptarget --> Moving [[#SIZEX]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDRX]])
|
||||
// clang-format on
|
||||
printf("After tgt: %d\n", s1.p[1]); // CHECK-DAG: After tgt: 222
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
// RUN: %libomptarget-compile-generic -fopenmp-version=60
|
||||
// RUN: %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=CHECK
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
|
||||
// The "present" check should pass on the "target" construct (2),
|
||||
// and there should be no "to" transfer, because the pointee "x" is already
|
||||
// present (because of (1)).
|
||||
// However, there should be a "from" transfer at the end of (2) because of the
|
||||
// "delete" on the mapper.
|
||||
|
||||
// FIXME: This currently fails, but should start passing once ATTACH-style maps
|
||||
// are enabled for mappers (#166874).
|
||||
// UNSUPPORTED: true
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
int *p;
|
||||
int *q;
|
||||
} S;
|
||||
#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p) \
|
||||
map(alloc, present : s.p[0 : 10]) map(delete : s.q[ : ]) \
|
||||
map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) map(alloc : s.p[0 : 10])
|
||||
|
||||
S s1;
|
||||
int main() {
|
||||
int x[10];
|
||||
x[1] = 111;
|
||||
s1.q = s1.p = &x[0];
|
||||
|
||||
#pragma omp target data map(alloc : x) // (1)
|
||||
{
|
||||
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
|
||||
#pragma omp target map(mapper(my_mapper), tofrom : s1) // (2)
|
||||
{
|
||||
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
|
||||
printf("In tgt: %d\n", s1.p[1]); // CHECK-NOT: In tgt: 111
|
||||
s1.p[1] = 222;
|
||||
}
|
||||
printf("After tgt: %d\n", s1.p[1]); // CHECK: After tgt: 222
|
||||
}
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x[[#%x,HOST_ADDR:]] size=[[#%u,SIZE:]] within region being released
|
||||
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
|
||||
// clang-format on
|
||||
}
|
||||
26
offload/test/mapping/map_ordering_tgt_alloc_from_to.c
Normal file
26
offload/test/mapping/map_ordering_tgt_alloc_from_to.c
Normal file
@@ -0,0 +1,26 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// Even if the "alloc" and "from" are encountered before the "to",
|
||||
// there should be a data-transfer from host to device, as the
|
||||
// ref-count goes from 0 to 1 at the entry of the target region.
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> HstPtrBegin 0x[[#%x,HOST_ADDR:]] was newly allocated for the current region
|
||||
// DEBUG: omptarget --> Moving {{.*}} bytes (hst:0x{{0*}}[[#HOST_ADDR]]) -> (tgt:0x{{.*}})
|
||||
// clang-format on
|
||||
#pragma omp target map(alloc : x) map(from : x) map(to : x) map(alloc : x)
|
||||
{
|
||||
printf("%d\n", x); // CHECK: 111
|
||||
x = x + 111;
|
||||
}
|
||||
|
||||
printf("%d\n", x); // CHECK: 222
|
||||
}
|
||||
25
offload/test/mapping/map_ordering_tgt_alloc_present_tofrom.c
Normal file
25
offload/test/mapping/map_ordering_tgt_alloc_present_tofrom.c
Normal file
@@ -0,0 +1,25 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: %libomptarget-run-fail-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
// CHECK: addr=0x[[#%x,HOST_ADDR:]], size=[[#%u,SIZE:]]
|
||||
int x = 111;
|
||||
fprintf(stderr, "addr=%p, size=%ld\n", &x, sizeof(x));
|
||||
|
||||
// clang-format off
|
||||
// CHECK: omptarget message: device mapping required by 'present' map type modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] bytes)
|
||||
// CHECK: omptarget error: Pointer 0x{{0*}}[[#HOST_ADDR]] was not present on the device upon entry to the region.
|
||||
// CHECK: omptarget error: Call to targetDataBegin failed, abort target.
|
||||
// CHECK: omptarget error: Failed to process data before launching the kernel.
|
||||
// CHECK: omptarget fatal error 1: failure of target construct while offloading is mandatory
|
||||
// clang-format on
|
||||
#pragma omp target map(alloc : x) map(present, alloc : x) map(tofrom : x)
|
||||
{
|
||||
printf("%d\n", x);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
15
offload/test/mapping/map_ordering_tgt_alloc_tofrom.c
Normal file
15
offload/test/mapping/map_ordering_tgt_alloc_tofrom.c
Normal file
@@ -0,0 +1,15 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target map(alloc : x) map(tofrom : x) map(alloc : x)
|
||||
{
|
||||
printf("In tgt: %d\n", x); // CHECK-DAG: In tgt: 111
|
||||
x = x + 111;
|
||||
}
|
||||
|
||||
printf("After tgt: %d\n", x); // CHECK-DAG: After tgt: 222
|
||||
}
|
||||
15
offload/test/mapping/map_ordering_tgt_data_alloc_from.c
Normal file
15
offload/test/mapping/map_ordering_tgt_data_alloc_from.c
Normal file
@@ -0,0 +1,15 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target data map(alloc : x) map(from : x) map(alloc : x)
|
||||
{
|
||||
#pragma omp target map(present, alloc : x)
|
||||
x = 222;
|
||||
}
|
||||
|
||||
printf("After tgt data: %d\n", x); // CHECK: After tgt data: 222
|
||||
}
|
||||
18
offload/test/mapping/map_ordering_tgt_data_alloc_to_from.c
Normal file
18
offload/test/mapping/map_ordering_tgt_data_alloc_to_from.c
Normal file
@@ -0,0 +1,18 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target data map(alloc : x) map(to : x) map(from : x) map(alloc : x)
|
||||
{
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
printf("%d\n", x); // CHECK: 111
|
||||
x = x + 111;
|
||||
}
|
||||
}
|
||||
|
||||
printf("%d\n", x); // CHECK: 222
|
||||
}
|
||||
18
offload/test/mapping/map_ordering_tgt_data_alloc_tofrom.c
Normal file
18
offload/test/mapping/map_ordering_tgt_data_alloc_tofrom.c
Normal file
@@ -0,0 +1,18 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target data map(alloc : x) map(tofrom : x) map(alloc : x)
|
||||
{
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
printf("%d\n", x); // CHECK: 111
|
||||
x = x + 111;
|
||||
}
|
||||
}
|
||||
|
||||
printf("%d\n", x); // CHECK: 222
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
// There should only be one "from" data-transfer, despite the two duplicate
|
||||
// maps.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target data map(alloc : x)
|
||||
{
|
||||
#pragma omp target enter data map(alloc : x) map(to : x)
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
printf("In tgt: %d\n", x); // CHECK-NOT: In tgt: 111
|
||||
x = 222;
|
||||
}
|
||||
#pragma omp target exit data map(always, from : x) map(always, from : x)
|
||||
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{.*}})
|
||||
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes
|
||||
}
|
||||
|
||||
printf("After tgt exit data: %d\n", x); // CHECK: After tgt exit data: 222
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
// RUN: %libomptarget-compile-run-and-check-generic
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x = 111;
|
||||
#pragma omp target data map(alloc : x)
|
||||
{
|
||||
#pragma omp target enter data map(alloc : x) map(to : x)
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
|
||||
printf("In tgt: %d\n", x); // CHECK-NOT: In tgt: 111
|
||||
x = 222;
|
||||
}
|
||||
#pragma omp target exit data map(delete : x) map(from : x) map(delete : x)
|
||||
printf("After tgt exit data: %d\n", x); // CHECK: After tgt exit data: 222
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// RUN: %libomptarget-compile-generic -fopenmp-version=60
|
||||
// RUN: %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=CHECK
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
// The from on target_exit_data should result in a data-transfer of 4 bytes,
|
||||
// even if when "from" is honored, the ref-count hasn't gone down to 0.
|
||||
// It will eventually go down to 0 as part of the same exit_data due to the
|
||||
// "delete" on it.
|
||||
// This is a case that cannot be handled at compile time because the list-items
|
||||
// are not related.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x[10];
|
||||
int *p1x, *p2x;
|
||||
p1x = p2x = &x[0];
|
||||
|
||||
#pragma omp target data map(alloc : x)
|
||||
{
|
||||
#pragma omp target enter data map(alloc : x) map(to : x)
|
||||
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
|
||||
printf("In tgt: %d\n", x[1]); // CHECK-NOT: In tgt: 111
|
||||
x[1] = 222;
|
||||
}
|
||||
|
||||
#pragma omp target exit data map(delete : p1x[ : ]) map(from : p2x[1])
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x[[#%x,HOST_ADDR:]] size=[[#%u,SIZE:]] within region being released
|
||||
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
|
||||
// clang-format on
|
||||
|
||||
// CHECK: After tgt exit data: 222
|
||||
printf("After tgt exit data: %d\n", x[1]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
// RUN: %libomptarget-compile-generic -fopenmp-version=60
|
||||
// RUN: %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=CHECK
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
// The from on target_exit_data should result in a data-transfer of 4 bytes,
|
||||
// even if when "delete" is honored first, and by the time "from" is
|
||||
// encountered, the ref-count had already been 0 (i.e. it's not transitioning
|
||||
// from non-zero to zero).
|
||||
// This is a case that cannot be handled at compile time because the list-items
|
||||
// are not related.
|
||||
|
||||
#include <stdio.h>
|
||||
int main() {
|
||||
int x[10];
|
||||
int *p1x, *p2x;
|
||||
p1x = p2x = &x[1];
|
||||
x[1] = 111;
|
||||
|
||||
#pragma omp target data map(alloc : x)
|
||||
{
|
||||
#pragma omp target enter data map(alloc : x) map(to : x)
|
||||
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
|
||||
#pragma omp target map(present, alloc : x)
|
||||
{
|
||||
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
|
||||
printf("In tgt: %d\n", x[1]); // CHECK-NOT: In tgt: 111
|
||||
x[1] = 222;
|
||||
}
|
||||
|
||||
#pragma omp target exit data map(from : p2x[0]) map(delete : p1x[ : ])
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> Pointer HstPtr=0x[[#%x,HOST_ADDR:]] falls within a range previously released
|
||||
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
|
||||
// clang-format on
|
||||
|
||||
// CHECK: After tgt exit data: 222
|
||||
printf("After tgt exit data: %d\n", x[1]);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=CHECK
|
||||
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
|
||||
// RUN: | %fcheck-generic -check-prefix=DEBUG
|
||||
// REQUIRES: libomptarget-debug
|
||||
// XFAIL: intelgpu
|
||||
|
||||
// The test ensures that the FROM transfer for the full "s1" is performed, and
|
||||
// not just the FROM done via the mapper of s1.s2.
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct {
|
||||
int a;
|
||||
int b;
|
||||
} S2;
|
||||
|
||||
#pragma omp declare mapper(my_mapper : S2 s2) map(tofrom : s2.a)
|
||||
|
||||
typedef struct {
|
||||
S2 s2;
|
||||
int c;
|
||||
int d;
|
||||
} S1;
|
||||
|
||||
S1 s1;
|
||||
|
||||
int main() {
|
||||
#pragma omp target enter data map(alloc : s1)
|
||||
|
||||
#pragma omp target map(present, alloc : s1)
|
||||
{
|
||||
s1.s2.a = 111;
|
||||
s1.s2.b = 222;
|
||||
s1.c = 333;
|
||||
s1.d = 444;
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
// DEBUG: omptarget --> Tracking released entry: HstPtr=0x[[#%x,HOST_ADDR:]], Size=[[#%u,SIZE:]], ForceDelete=0
|
||||
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{.*}})
|
||||
// DEBUG: omptarget --> Pointer HstPtr=0x{{0*}}[[#HOST_ADDR]] falls within a range previously released
|
||||
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
|
||||
// clang-format on
|
||||
#pragma omp target exit data map(from : s1) map(mapper(my_mapper), from : s1.s2)
|
||||
|
||||
// CHECK: 111 222 333 444
|
||||
printf("%d %d %d %d\n", s1.s2.a, s1.s2.b, s1.c, s1.d);
|
||||
}
|
||||
Reference in New Issue
Block a user