Reland "[OpenMP][Offload] Handle present/to/from when a different entry did alloc/delete." (#184260)

Some tests that were checking for prints inside/outside `target` regions
needed to be updated to work on systems where the ordering wasn't
deterministic.

Reverts llvm/llvm-project#184240
    
Original description from #165494:

-----

OpenMP allows cases like the following:

```c
  int *p1, *p2, x;
  p1 = p2 = &x;
  ...
  #pragma omp target_exit_data map(delete: p1[:]) from(p2[0])
```

Which means, when the runtime encounters the `from` entry, the ref-count
may
not be zero, but it will go down to zero at the end of the current
construct,
which should cause the "from" transfer to happen.

Similarly, a user may have:

```c
  struct S {
    int *p;
  };

  #pragma omp declare_mapper (id1: S s) map(s.p) map(present, alloc: s.p[0:10])
  #pragma omp declare_mapper (id2: S s) map(s.p, s.p[0:10])

  S s1;

 // present-check should fail here
 #pragma omp target_enter_data map(alloc: s.p[0:10]) map(mapper(id1), to: s)
 // "to" should be honored here
 #pragma omp target_enter_data map(alloc: s.p[0:10]) map(mapper(id2), to: s)
```

Where the allocation happens before the "to" entry is encountered by the
runtime. Or, an allocation happens before a "present" entry is
encountered.

To handle cases like this, we need to use the state information of
previously
seen new allocations, deletions, "from" entries, when honoring
`to`/`from`/`present` map entries.

-----
This commit is contained in:
Abhinav Gaba
2026-03-02 15:32:30 -08:00
committed by GitHub
parent d4d18248fd
commit 1d1c83ad73
17 changed files with 765 additions and 76 deletions

View File

@@ -495,20 +495,110 @@ struct AttachMapInfo {
MapType(Type), Pointername(Name) {}
};
/// Structure to track ATTACH entries and new allocations across recursive calls
/// (for handling mappers) to targetDataBegin for a given construct.
struct AttachInfoTy {
/// ATTACH map entries for deferred processing.
/// Structure to track new allocations, ATTACH entries, DELETE entries and
/// skipped FROM data transfer information for a given construct, across
/// recursive calls (for handling mappers) to targetDataBegin/targetDataEnd.
struct StateInfoTy {
/// ATTACH map entries for deferred processing until all other maps are done.
llvm::SmallVector<AttachMapInfo> AttachEntries;
/// Host pointers for which new memory was allocated.
/// Key: host pointer, Value: allocation size.
llvm::DenseMap<void *, int64_t> NewAllocations;
AttachInfoTy() = default;
/// Host pointers that had a FROM entry, but for which a data transfer was
/// skipped due to the ref-count not being zero.
/// Key: host pointer, Value: data size.
llvm::DenseMap<void *, int64_t> SkippedFromEntries;
/// Host pointers for which we have triggered a FROM transfer at some point
/// during targetDataEnd. It's used to avoid duplicate transfers.
/// Key: host pointer, Value: transferred size.
llvm::DenseMap<void *, int64_t> TransferredFromEntries;
/// Starting host address and size of entries whose ref-count went to zero.
/// This includes entries released through explicit DELETE, or normal
/// ref-count decrements. It's used to ensure transfers are performed for FROM
/// entries whose ref-count is already zero when the entry is encountered.
/// Key: host pointer, Value: size.
llvm::DenseMap<void *, int64_t> ReleasedEntries;
StateInfoTy() = default;
// Delete copy constructor and copy assignment operator to prevent copying
AttachInfoTy(const AttachInfoTy &) = delete;
AttachInfoTy &operator=(const AttachInfoTy &) = delete;
StateInfoTy(const StateInfoTy &) = delete;
StateInfoTy &operator=(const StateInfoTy &) = delete;
private:
/// Helper to find an entry in \p EntryMap that contains the pointer.
/// Returns the matching entry if found, otherwise std::nullopt.
std::optional<std::pair<void *, int64_t>>
findEntryForPtr(void *Ptr,
const llvm::DenseMap<void *, int64_t> &EntryMap) const {
for (const auto &Entry : EntryMap) {
void *EntryBegin = Entry.first;
int64_t EntrySize = Entry.second;
if (Ptr >= EntryBegin &&
Ptr < static_cast<void *>(static_cast<char *>(EntryBegin) +
EntrySize)) {
return Entry;
}
}
return std::nullopt;
}
public:
/// Check if a pointer falls within any of the newly allocated ranges.
/// Returns the matching entry if found, otherwise std::nullopt.
std::optional<std::pair<void *, int64_t>> wasNewlyAllocated(void *Ptr) const {
return findEntryForPtr(Ptr, NewAllocations);
}
/// Check if a pointer range [Ptr, Ptr+Size) is fully contained within any
/// previously completed FROM transfer.
/// Returns the matching entry if found, otherwise std::nullopt.
std::optional<std::pair<void *, int64_t>>
wasTransferredFrom(void *Ptr, int64_t Size) const {
uintptr_t CheckBegin = reinterpret_cast<uintptr_t>(Ptr);
uintptr_t CheckEnd = CheckBegin + Size;
for (const auto &Entry : TransferredFromEntries) {
void *RangePtr = Entry.first;
int64_t RangeSize = Entry.second;
uintptr_t RangeBegin = reinterpret_cast<uintptr_t>(RangePtr);
uintptr_t RangeEnd = RangeBegin + RangeSize;
if (CheckBegin >= RangeBegin && CheckEnd <= RangeEnd) {
return Entry;
}
}
return std::nullopt;
}
/// Check if a pointer falls within any released entry's range.
/// Returns the matching entry if found, otherwise std::nullopt.
std::optional<std::pair<void *, int64_t>>
wasPreviouslyReleased(void *Ptr) const {
return findEntryForPtr(Ptr, ReleasedEntries);
}
/// Add a skipped FROM entry. Only updates the entry if this is a new pointer
/// or if the new size is larger than the existing entry.
void addSkippedFromEntry(void *Ptr, int64_t Size) {
auto It = SkippedFromEntries.find(Ptr);
if (It == SkippedFromEntries.end() || Size > It->second) {
SkippedFromEntries[Ptr] = Size;
}
}
/// Add a transferred FROM entry. Only updates the entry if this is a new
/// pointer or if the new size is larger than the existing entry.
void addTransferredFromEntry(void *Ptr, int64_t Size) {
auto It = TransferredFromEntries.find(Ptr);
if (It == TransferredFromEntries.end() || Size > It->second) {
TransferredFromEntries[Ptr] = Size;
}
}
};
// Function pointer type for targetData* functions (targetDataBegin,
@@ -516,7 +606,7 @@ struct AttachInfoTy {
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
map_var_info_t *, void **, AsyncInfoTy &,
AttachInfoTy *, bool);
StateInfoTy *, bool);
void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
bool toStdOut = false);
@@ -525,24 +615,22 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);
// Process deferred ATTACH map entries collected during targetDataBegin.
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo,
AsyncInfoTy &AsyncInfo);
struct MappingInfoTy {
@@ -583,7 +671,7 @@ struct MappingInfoTy {
bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
bool ReleaseHDTTMap = true);
bool ReleaseHDTTMap = true, StateInfoTy *StateInfo = nullptr);
/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
/// ensures exclusive access to the HDTT map.

View File

@@ -209,7 +209,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap,
StateInfoTy *StateInfo) {
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
LR.TPR.Flags.IsPresent = true;
@@ -328,10 +329,36 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
if (ReleaseHDTTMap)
HDTTMap.destroy();
// If the target pointer is valid, and we need to transfer data, issue the
// data transfer.
// Lambda to check if this pointer was newly allocated on the current region.
// This is needed to handle cases when the TO entry is encountered after an
// alloc entry for the same pointer. In such cases, the ref-count is already
// non-zero when TO is encountered, but we still need to do a transfer. e.g.
//
// struct S {
// int *p;
// };
// #pragma omp declare mapper(id : S s) map(to: s.p, s.p[0 : 10])
//
// S s1;
// ...
// #pragma omp target map(alloc : s1.p[0 : 10]) map(mapper(id), to : s1)
auto WasNewlyAllocatedForCurrentRegion = [&]() {
if (!StateInfo)
return false;
bool WasNewlyAllocated =
StateInfo->wasNewlyAllocated(HstPtrBegin).has_value();
if (WasNewlyAllocated)
ODBG(ODT_Mapping) << "HstPtrBegin " << HstPtrBegin
<< " was newly allocated for the current region";
return WasNewlyAllocated;
};
// Even if this isn't a new entry, we still need to do a data-transfer if
// the pointer was newly allocated on the current target region.
if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo &&
(LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) {
(LR.TPR.Flags.IsNewEntry || HasFlagAlways ||
WasNewlyAllocatedForCurrentRegion()) &&
Size != 0) {
// If we have something like:
// #pragma omp target map(to: s.myarr[0:10]) map(to: s.myarr[0:10])

View File

@@ -168,19 +168,22 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
int Rc = OFFLOAD_SUCCESS;
// Only allocate AttachInfo for targetDataBegin
std::unique_ptr<AttachInfoTy> AttachInfo;
if (TargetDataFunction == targetDataBegin)
AttachInfo = std::make_unique<AttachInfoTy>();
// Allocate StateInfo for targetDataBegin and targetDataEnd to track
// allocations, pointer attachments and deferred transfers.
// This is not needed for targetDataUpdate.
std::unique_ptr<StateInfoTy> StateInfo;
if (TargetDataFunction == targetDataBegin ||
TargetDataFunction == targetDataEnd)
StateInfo = std::make_unique<StateInfoTy>();
Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
AttachInfo.get(), /*FromMapper=*/false);
StateInfo.get(), /*FromMapper=*/false);
if (Rc == OFFLOAD_SUCCESS) {
// Process deferred ATTACH entries BEFORE synchronization
if (AttachInfo && !AttachInfo->AttachEntries.empty())
Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo);
if (StateInfo && !StateInfo->AttachEntries.empty())
Rc = processAttachEntries(*DeviceOrErr, *StateInfo, AsyncInfo);
if (Rc == OFFLOAD_SUCCESS)
Rc = AsyncInfo.synchronize();

View File

@@ -298,7 +298,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
void *ArgMapper, AsyncInfoTy &AsyncInfo,
TargetDataFuncPtrTy TargetDataFunction,
AttachInfoTy *AttachInfo = nullptr) {
StateInfoTy *StateInfo = nullptr) {
ODBG(ODT_Interface) << "Calling the mapper function " << ArgMapper;
// The mapper function fills up Components.
@@ -329,7 +329,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
MapperArgsBase.data(), MapperArgs.data(),
MapperArgSizes.data(), MapperArgTypes.data(),
MapperArgNames.data(), /*arg_mappers*/ nullptr,
AsyncInfo, AttachInfo, /*FromMapper=*/true);
AsyncInfo, StateInfo, /*FromMapper=*/true);
return Rc;
}
@@ -512,9 +512,9 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo, bool FromMapper) {
assert(AttachInfo && "AttachInfo must be available for targetDataBegin for "
"handling ATTACH map-types.");
StateInfoTy *StateInfo, bool FromMapper) {
assert(StateInfo && "StateInfo must be available for targetDataBegin for "
"handling ATTACH and TO/TOFROM map-types.");
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
// Ignore private variables and arrays - there is no mapping for them.
@@ -533,7 +533,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
targetDataBegin, AttachInfo);
targetDataBegin, StateInfo);
if (Rc != OFFLOAD_SUCCESS) {
REPORT() << "Call to targetDataBegin via targetDataMapper for custom "
@@ -560,7 +560,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// similar to firstprivate (PRIVATE | TO) entries by
// PrivateArgumentManager.
if (!IsCorrespondingPointerInit)
AttachInfo->AttachEntries.emplace_back(
StateInfo->AttachEntries.emplace_back(
/*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
/*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
/*PointeeName=*/HstPtrName);
@@ -637,7 +637,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// Track new allocation, for eventual use in attachment decision-making.
if (PointerTpr.Flags.IsNewEntry && !IsHostPtr)
AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *);
StateInfo->NewAllocations[HstPtrBase] = sizeof(void *);
ODBG(ODT_Mapping) << "There are " << sizeof(void *)
<< " bytes allocated at target address "
@@ -659,7 +659,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
auto TPR = Device.getMappingInfo().getTargetPointer(
HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName,
HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry());
HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry(),
/*ReleaseHDTTMap=*/true, StateInfo);
void *TgtPtrBegin = TPR.TargetPointer;
IsHostPtr = TPR.Flags.IsHostPointer;
// If data_size==0, then the argument could be a zero-length pointer to
@@ -670,11 +671,26 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
: "device failure or illegal mapping")
<< ").";
return OFFLOAD_FAIL;
} else if (TgtPtrBegin && HasPresentModifier &&
StateInfo->wasNewlyAllocated(HstPtrBegin).has_value()) {
// For "PRESENT" entries, we may have cases like the following:
// int *xp = &x[0];
// map(alloc: x[:]) map(present, alloc: xp[1])
// The "PRESENT" entry may be encountered after a previous entry
// allocated new storage for the pointer.
// To catch such cases, we need to look at any existing allocations
// and error out if we have any matching the pointer.
MESSAGE("device mapping required by 'present' map type modifier does not "
"exist for host address " DPxMOD " (%" PRId64 " bytes)\n",
DPxPTR(HstPtrBegin), DataSize);
REPORT() << "Pointer " << HstPtrBegin
<< " was not present on the device upon entry to the region.";
return OFFLOAD_FAIL;
}
// Track new allocation, for eventual use in attachment decision-making.
// Track new allocation, for eventual use in attachment/to decision-making.
if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin)
AttachInfo->NewAllocations[HstPtrBegin] = DataSize;
StateInfo->NewAllocations[HstPtrBegin] = DataSize;
ODBG(ODT_Mapping) << "There are " << DataSize
<< " bytes allocated at target address " << TgtPtrBegin
@@ -794,24 +810,24 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
///
/// For this purpose, we insert a data_fence before the first
/// pointer-attachment, (3), to ensure that all pending transfers finish first.
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo,
AsyncInfoTy &AsyncInfo) {
// Report all tracked allocations from both main loop and ATTACH processing
if (!AttachInfo.NewAllocations.empty()) {
if (!StateInfo.NewAllocations.empty()) {
ODBG_OS(ODT_Mapping, [&](llvm::raw_ostream &OS) {
OS << "Tracked " << AttachInfo.NewAllocations.size()
OS << "Tracked " << StateInfo.NewAllocations.size()
<< " total new allocations:";
for (const auto &Alloc : AttachInfo.NewAllocations) {
for (const auto &Alloc : StateInfo.NewAllocations) {
OS << " Host ptr: " << Alloc.first << ", Size: " << Alloc.second
<< " bytes";
}
});
}
if (AttachInfo.AttachEntries.empty())
if (StateInfo.AttachEntries.empty())
return OFFLOAD_SUCCESS;
ODBG(ODT_Mapping) << "Processing " << AttachInfo.AttachEntries.size()
ODBG(ODT_Mapping) << "Processing " << StateInfo.AttachEntries.size()
<< " deferred ATTACH map entries";
bool TreatAttachAutoAsAlways = MappingConfig::get().TreatAttachAutoAsAlways;
@@ -821,9 +837,9 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
int Ret = OFFLOAD_SUCCESS;
bool IsFirstPointerAttachment = true;
for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
for (size_t EntryIdx = 0; EntryIdx < StateInfo.AttachEntries.size();
++EntryIdx) {
const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
const auto &AttachEntry = StateInfo.AttachEntries[EntryIdx];
void **HstPtr = reinterpret_cast<void **>(AttachEntry.PointerBase);
@@ -844,18 +860,11 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
// Lambda to check if a pointer was newly allocated
auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) {
bool IsNewlyAllocated =
llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) {
void *AllocPtr = Alloc.first;
int64_t AllocSize = Alloc.second;
return Ptr >= AllocPtr &&
Ptr < reinterpret_cast<void *>(
reinterpret_cast<char *>(AllocPtr) + AllocSize);
});
bool WasNewlyAllocated = StateInfo.wasNewlyAllocated(Ptr).has_value();
ODBG(ODT_Mapping) << "Attach " << PtrName << " " << Ptr
<< " was newly allocated: "
<< (IsNewlyAllocated ? "yes" : "no");
return IsNewlyAllocated;
<< (WasNewlyAllocated ? "yes" : "no");
return WasNewlyAllocated;
};
// Only process ATTACH if either the pointee or the pointer was newly
@@ -1065,7 +1074,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo, bool FromMapper) {
StateInfoTy *StateInfo, bool FromMapper) {
assert(StateInfo && "StateInfo is required for targetDataEnd for handling "
"FROM data transfers");
int Ret = OFFLOAD_SUCCESS;
auto *PostProcessingPtrs = new SmallVector<PostProcessingInfo>();
// process each input.
@@ -1094,7 +1105,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I],
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
targetDataEnd);
targetDataEnd, StateInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Call to targetDataEnd via targetDataMapper for custom "
@@ -1162,26 +1173,65 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
if (!TPR.isPresent())
continue;
// Track entries whose ref-count went to zero (IsLast=true) so that we
// can honor any subsequently encountered FROM entries that fall within
// their range.
if (TPR.Flags.IsLast) {
// For assumed-size arrays like map(delete: p[:]), the compiler provides
// no size information, so we need to get the actual allocated extent from
// the HDTT entry.
void *ReleasedHstPtrBegin =
reinterpret_cast<void *>(TPR.getEntry()->HstPtrBegin);
int64_t ReleasedSize =
TPR.getEntry()->HstPtrEnd - TPR.getEntry()->HstPtrBegin;
ODBG(ODT_Mapping) << "Tracking released entry: HstPtr="
<< ReleasedHstPtrBegin << ", Size=" << ReleasedSize
<< ", ForceDelete=" << ForceDelete;
StateInfo->ReleasedEntries[ReleasedHstPtrBegin] = ReleasedSize;
}
// Move data back to the host
const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM;
if (HasFrom && (HasAlways || TPR.Flags.IsLast) &&
!TPR.Flags.IsHostPointer && DataSize != 0) {
ODBG(ODT_Mapping) << "Moving " << DataSize
<< " bytes (tgt:" << TgtPtrBegin
<< ") -> (hst:" << HstPtrBegin << ")";
// Lambda to perform the actual FROM data retrieval from device to host
auto PerformFromRetrieval = [&](void *HstPtr, void *TgtPtr, int64_t Size,
HostDataToTargetTy *Entry) -> int {
// Check if this FROM transfer can be skipped.
//
// This is an optimization that may help in rare cases when we have
// multiple overlapping FROM entries. e.g.
//
// ... map(always, from: x) map(always, from: x)
// ... map(delete: x) map(from: x) map(from: x)
//
// If we think the overhead makes it not worh it, we can remove it.
if (auto TransferredEntry = StateInfo->wasTransferredFrom(HstPtr, Size)) {
void *TransferredPtr = TransferredEntry->first;
int64_t TransferredSize = TransferredEntry->second;
ODBG(ODT_Mapping) << "FROM entry HstPtr=" << HstPtr << " size=" << Size
<< " already transferred within [" << TransferredPtr
<< ", "
<< static_cast<void *>(
static_cast<char *>(TransferredPtr) +
TransferredSize)
<< ")";
return OFFLOAD_SUCCESS;
}
ODBG(ODT_Mapping) << "Moving " << Size << " bytes (tgt:" << TgtPtr
<< ") -> (hst:" << HstPtr << ")";
TIMESCOPE_WITH_DETAILS_AND_IDENT(
"DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc);
"DevToHost", "Size=" + std::to_string(Size) + "B", Loc);
// Wait for any previous transfer if an event is present.
if (void *Event = TPR.getEntry()->getEvent()) {
if (void *Event = Entry->getEvent()) {
if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
REPORT() << "Failed to wait for event " << Event << ".";
return OFFLOAD_FAIL;
}
}
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo,
TPR.getEntry());
int Ret = Device.retrieveData(HstPtr, TgtPtr, Size, AsyncInfo, Entry);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Copying data from device failed.";
return OFFLOAD_FAIL;
@@ -1193,10 +1243,128 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// copy-back was issued but before it completed. Since the reuse might
// also copy-back a value we would race.
if (TPR.Flags.IsLast) {
if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
OFFLOAD_SUCCESS)
if (Entry->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
}
// Track this transfer to avoid duplicate transfers later on.
StateInfo->addTransferredFromEntry(HstPtr, Size);
return OFFLOAD_SUCCESS;
};
// Lambda to check if this pointer was previously released.
//
// This is needed to handle cases like the following:
// p1 = p2 = &x;
// ... map(delete: p1[:]) map(from: p2[0:1])
// The ref-count becomes zero before encountering the FROM entry, but we
// still need to do a transfer, if it went from non-zero to zero.
//
// OpenMP 6.0, sec. 7.9.6 "map Clause", p. 284 L24-26:
// If the reference count of the corresponding list item is one or if
// the always-modifier or delete-modifier is specified, and if the map
// type is from, the original list item is updated as if the list item
// appeared in a from clause on a target_update directive.
auto WasPreviouslyReleased = [&]() -> bool {
auto ReleasedEntry = StateInfo->wasPreviouslyReleased(HstPtrBegin);
if (!ReleasedEntry)
return false;
void *ReleasedPtr = ReleasedEntry->first;
int64_t ReleasedSize = ReleasedEntry->second;
ODBG(ODT_Mapping) << "Pointer HstPtr=" << HstPtrBegin
<< " falls within a range previously released ["
<< ReleasedPtr << ", "
<< static_cast<void *>(
static_cast<char *>(ReleasedPtr) + ReleasedSize)
<< ") with size=" << ReleasedSize;
return true;
};
bool IsMapFromOnNonHostNonZeroData =
HasFrom && !TPR.Flags.IsHostPointer && DataSize != 0;
auto IsLastOrHasAlwaysOrWasReleased = [&]() {
return TPR.Flags.IsLast || HasAlways || WasPreviouslyReleased();
};
if (IsMapFromOnNonHostNonZeroData && IsLastOrHasAlwaysOrWasReleased()) {
Ret = PerformFromRetrieval(HstPtrBegin, TgtPtrBegin, DataSize,
TPR.getEntry());
if (Ret != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
} else if (IsMapFromOnNonHostNonZeroData) {
// We can have cases like the following:
// p1 = p2 = &x;
// ... map(storage: p1[:]) map(from: p2[1:1])
//
// where it's possible that when the FROM entry is processed, the
// ref count is not zero, so no data transfer happens for it. But
// the ref-count can go down to zero once all maps have been processed
// for the current construct, in which case a transfer should happen.
//
// So, we keep track of any skipped FROM data-transfers, in case
// the ref-count goes down to zero later on.
//
// This cannot be handled in the compiler for all cases because the
// list-items may look very different, as shown in the example above,
// which is allowed with OpenMP 6.0:
//
// OpenMP 6.0, sec. 7.9.6 "map Clause", p. 286 L18-21:
// Two list items of the map clauses on the same construct must not share
// original storage unless one of the following is true: they are the same
// list item, one is the containing structure of the other, at least one
// is an assumed-size array, or at least one is implicitly mapped due to
// the list item also appearing in a use_device_addr clause.
StateInfo->addSkippedFromEntry(HstPtrBegin, DataSize);
ODBG(ODT_Mapping) << "Skipping FROM map transfer for HstPtr="
<< HstPtrBegin << " size=" << DataSize
<< " (IsLast=" << TPR.Flags.IsLast << ", TotalRefCount="
<< TPR.getEntry()->getTotalRefCount() << ")";
}
// If the ref-count went to zero (IsLast=true), check if any previously
// skipped FROM entries fall within this released entry's range.
if (TPR.Flags.IsLast && !StateInfo->SkippedFromEntries.empty()) {
uintptr_t ReleasedBeginPtrInt = TPR.getEntry()->HstPtrBegin;
uintptr_t ReleasedEndPtrInt = TPR.getEntry()->HstPtrEnd;
SmallVector<void *, 32> ToRemove;
for (auto &SkippedFromEntry : StateInfo->SkippedFromEntries) {
void *FromBeginPtr = SkippedFromEntry.first;
int64_t FromDataSize = SkippedFromEntry.second;
uintptr_t FromBeginPtrInt = reinterpret_cast<uintptr_t>(FromBeginPtr);
// Check if this skipped FROM entry's starting pointer falls within this
// released entry
if (FromBeginPtrInt >= ReleasedBeginPtrInt &&
FromBeginPtrInt < ReleasedEndPtrInt) {
ODBG(ODT_Mapping)
<< "Found skipped FROM entry: HstPtr=" << FromBeginPtr
<< " size=" << FromDataSize << " within region being released ["
<< reinterpret_cast<void *>(ReleasedBeginPtrInt) << ", "
<< reinterpret_cast<void *>(ReleasedEndPtrInt) << ")";
// Calculate offset within the target pointer
int64_t Offset = FromBeginPtrInt - ReleasedBeginPtrInt;
void *FromTgtBeginPtr =
static_cast<void *>(static_cast<char *>(TgtPtrBegin) + Offset);
// Perform the retrieval for this skipped entry
int Ret = PerformFromRetrieval(
reinterpret_cast<void *>(FromBeginPtrInt), FromTgtBeginPtr,
FromDataSize, TPR.getEntry());
if (Ret != OFFLOAD_SUCCESS)
return OFFLOAD_FAIL;
ToRemove.push_back(FromBeginPtr);
}
}
// Remove processed entries
for (void *Ptr : ToRemove)
StateInfo->SkippedFromEntries.erase(Ptr);
}
// Add pointer to the buffer for post-synchronize processing.
@@ -1377,7 +1545,7 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo, bool FromMapper) {
StateInfoTy *StateInfo, bool FromMapper) {
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -1872,21 +2040,21 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
// Create AttachInfo for tracking any ATTACH entries, or new-allocations
// Create StateInfo for tracking any ATTACH entries, new allocations,
// when handling the "begin" mapping for a target constructs.
AttachInfoTy AttachInfo;
StateInfoTy StateInfo;
int Ret = targetDataBegin(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
&AttachInfo, false /*FromMapper=*/);
&StateInfo, false /*FromMapper=*/);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Call to targetDataBegin failed, abort target.";
return OFFLOAD_FAIL;
}
// Process collected ATTACH entries
if (!AttachInfo.AttachEntries.empty()) {
Ret = processAttachEntries(*DeviceOrErr, AttachInfo, AsyncInfo);
if (!StateInfo.AttachEntries.empty()) {
Ret = processAttachEntries(*DeviceOrErr, StateInfo, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Failed to process ATTACH entries.";
return OFFLOAD_FAIL;
@@ -2053,9 +2221,14 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr,
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
// Create StateInfo for tracking map(from)s for which ref-count is non-zero
// when the entry is encountered.
StateInfoTy StateInfo;
// Move data from device.
int Ret = targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo);
int Ret =
targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo, &StateInfo);
if (Ret != OFFLOAD_SUCCESS) {
REPORT() << "Call to targetDataEnd failed, abort target.";
return OFFLOAD_FAIL;

View File

@@ -0,0 +1,48 @@
// RUN: %libomptarget-compile-generic
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
// Since the allocation of the pointee happens on the "target" construct (1),
// the "to" transfer requested as part of the mapper (2) should also happen.
//
// Similarly, the "from" transfer should also happen at the end of the target
// construct, even if the ref-count of the pointee x has not gone down to 0
// when "from" is encountered.
#include <stdio.h>
typedef struct {
int *p;
int *q;
} S;
#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p, s.p[0 : 10]) \
map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) \
map(alloc : s.p[0 : 10]) // (2)
S s1;
int main() {
int x[10];
x[1] = 111;
s1.q = s1.p = &x[0];
// clang-format off
// DEBUG: omptarget --> HstPtrBegin 0x[[#%x,HOST_ADDRX:]] was newly allocated for the current region
// DEBUG: omptarget --> Moving [[#%u,SIZEX:]] bytes (hst:0x{{0*}}[[#HOST_ADDRX]]) -> (tgt:0x{{.*}})
// clang-format on
#pragma omp target map(alloc : s1.p[0 : 10]) \
map(mapper(my_mapper), tofrom : s1) // (1)
{
printf("In tgt: %d\n", s1.p[1]); // CHECK-DAG: In tgt: 111
s1.p[1] = s1.p[1] + 111;
}
// clang-format off
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x{{0*}}[[#HOST_ADDRX]] size=[[#SIZEX]] within region being released
// DEBUG: omptarget --> Moving [[#SIZEX]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDRX]])
// clang-format on
printf("After tgt: %d\n", s1.p[1]); // CHECK-DAG: After tgt: 222
}

View File

@@ -0,0 +1,49 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=60
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
// The "present" check should pass on the "target" construct (2),
// and there should be no "to" transfer, because the pointee "x" is already
// present (because of (1)).
// However, there should be a "from" transfer at the end of (2) because of the
// "delete" on the mapper.
// FIXME: This currently fails, but should start passing once ATTACH-style maps
// are enabled for mappers (#166874).
// UNSUPPORTED: true
#include <stdio.h>
typedef struct {
int *p;
int *q;
} S;
#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p) \
map(alloc, present : s.p[0 : 10]) map(delete : s.q[ : ]) \
map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) map(alloc : s.p[0 : 10])
S s1;
int main() {
int x[10];
x[1] = 111;
s1.q = s1.p = &x[0];
#pragma omp target data map(alloc : x) // (1)
{
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
#pragma omp target map(mapper(my_mapper), tofrom : s1) // (2)
{
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
printf("In tgt: %d\n", s1.p[1]); // CHECK-NOT: In tgt: 111
s1.p[1] = 222;
}
printf("After tgt: %d\n", s1.p[1]); // CHECK: After tgt: 222
}
// clang-format off
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x[[#%x,HOST_ADDR:]] size=[[#%u,SIZE:]] within region being released
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
// clang-format on
}

View File

@@ -0,0 +1,26 @@
// RUN: %libomptarget-compile-generic
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
#include <stdio.h>
// Even if the "alloc" and "from" are encountered before the "to",
// there should be a data-transfer from host to device, as the
// ref-count goes from 0 to 1 at the entry of the target region.
int main() {
int x = 111;
// clang-format off
// DEBUG: omptarget --> HstPtrBegin 0x[[#%x,HOST_ADDR:]] was newly allocated for the current region
// DEBUG: omptarget --> Moving {{.*}} bytes (hst:0x{{0*}}[[#HOST_ADDR]]) -> (tgt:0x{{.*}})
// clang-format on
#pragma omp target map(alloc : x) map(from : x) map(to : x) map(alloc : x)
{
printf("%d\n", x); // CHECK: 111
x = x + 111;
}
printf("%d\n", x); // CHECK: 222
}

View File

@@ -0,0 +1,25 @@
// RUN: %libomptarget-compile-generic
// RUN: %libomptarget-run-fail-generic 2>&1 \
// RUN: | %fcheck-generic
#include <stdio.h>
int main() {
// CHECK: addr=0x[[#%x,HOST_ADDR:]], size=[[#%u,SIZE:]]
int x = 111;
fprintf(stderr, "addr=%p, size=%ld\n", &x, sizeof(x));
// clang-format off
// CHECK: omptarget message: device mapping required by 'present' map type modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] bytes)
// CHECK: omptarget error: Pointer 0x{{0*}}[[#HOST_ADDR]] was not present on the device upon entry to the region.
// CHECK: omptarget error: Call to targetDataBegin failed, abort target.
// CHECK: omptarget error: Failed to process data before launching the kernel.
// CHECK: omptarget fatal error 1: failure of target construct while offloading is mandatory
// clang-format on
#pragma omp target map(alloc : x) map(present, alloc : x) map(tofrom : x)
{
printf("%d\n", x);
}
return 0;
}

View File

@@ -0,0 +1,15 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target map(alloc : x) map(tofrom : x) map(alloc : x)
{
printf("In tgt: %d\n", x); // CHECK-DAG: In tgt: 111
x = x + 111;
}
printf("After tgt: %d\n", x); // CHECK-DAG: After tgt: 222
}

View File

@@ -0,0 +1,15 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target data map(alloc : x) map(from : x) map(alloc : x)
{
#pragma omp target map(present, alloc : x)
x = 222;
}
printf("After tgt data: %d\n", x); // CHECK: After tgt data: 222
}

View File

@@ -0,0 +1,18 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target data map(alloc : x) map(to : x) map(from : x) map(alloc : x)
{
#pragma omp target map(present, alloc : x)
{
printf("%d\n", x); // CHECK: 111
x = x + 111;
}
}
printf("%d\n", x); // CHECK: 222
}

View File

@@ -0,0 +1,18 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target data map(alloc : x) map(tofrom : x) map(alloc : x)
{
#pragma omp target map(present, alloc : x)
{
printf("%d\n", x); // CHECK: 111
x = x + 111;
}
}
printf("%d\n", x); // CHECK: 222
}

View File

@@ -0,0 +1,28 @@
// RUN: %libomptarget-compile-generic
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
// There should only be one "from" data-transfer, despite the two duplicate
// maps.
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target data map(alloc : x)
{
#pragma omp target enter data map(alloc : x) map(to : x)
#pragma omp target map(present, alloc : x)
{
printf("In tgt: %d\n", x); // CHECK-NOT: In tgt: 111
x = 222;
}
#pragma omp target exit data map(always, from : x) map(always, from : x)
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{.*}})
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes
}
printf("After tgt exit data: %d\n", x); // CHECK: After tgt exit data: 222
}

View File

@@ -0,0 +1,20 @@
// RUN: %libomptarget-compile-run-and-check-generic
// XFAIL: intelgpu
#include <stdio.h>
int main() {
int x = 111;
#pragma omp target data map(alloc : x)
{
#pragma omp target enter data map(alloc : x) map(to : x)
#pragma omp target map(present, alloc : x)
{
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
printf("In tgt: %d\n", x); // CHECK-NOT: In tgt: 111
x = 222;
}
#pragma omp target exit data map(delete : x) map(from : x) map(delete : x)
printf("After tgt exit data: %d\n", x); // CHECK: After tgt exit data: 222
}
}

View File

@@ -0,0 +1,43 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=60
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
// The from on target_exit_data should result in a data-transfer of 4 bytes,
// even if when "from" is honored, the ref-count hasn't gone down to 0.
// It will eventually go down to 0 as part of the same exit_data due to the
// "delete" on it.
// This is a case that cannot be handled at compile time because the list-items
// are not related.
#include <stdio.h>
int main() {
int x[10];
int *p1x, *p2x;
p1x = p2x = &x[0];
#pragma omp target data map(alloc : x)
{
#pragma omp target enter data map(alloc : x) map(to : x)
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
#pragma omp target map(present, alloc : x)
{
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
printf("In tgt: %d\n", x[1]); // CHECK-NOT: In tgt: 111
x[1] = 222;
}
#pragma omp target exit data map(delete : p1x[ : ]) map(from : p2x[1])
// clang-format off
// DEBUG: omptarget --> Found skipped FROM entry: HstPtr=0x[[#%x,HOST_ADDR:]] size=[[#%u,SIZE:]] within region being released
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
// clang-format on
// CHECK: After tgt exit data: 222
printf("After tgt exit data: %d\n", x[1]);
}
}

View File

@@ -0,0 +1,43 @@
// RUN: %libomptarget-compile-generic -fopenmp-version=60
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
// The from on target_exit_data should result in a data-transfer of 4 bytes,
// even if when "delete" is honored first, and by the time "from" is
// encountered, the ref-count had already been 0 (i.e. it's not transitioning
// from non-zero to zero).
// This is a case that cannot be handled at compile time because the list-items
// are not related.
#include <stdio.h>
int main() {
int x[10];
int *p1x, *p2x;
p1x = p2x = &x[1];
x[1] = 111;
#pragma omp target data map(alloc : x)
{
#pragma omp target enter data map(alloc : x) map(to : x)
// DEBUG-NOT: omptarget --> Moving {{.*}} bytes (hst:0x{{.*}}) -> (tgt:0x{{.*}})
#pragma omp target map(present, alloc : x)
{
// NOTE: It's ok for this to be 111 under "unified_shared_memory"
printf("In tgt: %d\n", x[1]); // CHECK-NOT: In tgt: 111
x[1] = 222;
}
#pragma omp target exit data map(from : p2x[0]) map(delete : p1x[ : ])
// clang-format off
// DEBUG: omptarget --> Pointer HstPtr=0x[[#%x,HOST_ADDR:]] falls within a range previously released
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
// clang-format on
// CHECK: After tgt exit data: 222
printf("After tgt exit data: %d\n", x[1]);
}
}

View File

@@ -0,0 +1,50 @@
// RUN: %libomptarget-compile-generic
// RUN: %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=CHECK
// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
// XFAIL: intelgpu
// The test ensures that the FROM transfer for the full "s1" is performed, and
// not just the FROM done via the mapper of s1.s2.
#include <stdio.h>
typedef struct {
int a;
int b;
} S2;
#pragma omp declare mapper(my_mapper : S2 s2) map(tofrom : s2.a)
typedef struct {
S2 s2;
int c;
int d;
} S1;
S1 s1;
int main() {
#pragma omp target enter data map(alloc : s1)
#pragma omp target map(present, alloc : s1)
{
s1.s2.a = 111;
s1.s2.b = 222;
s1.c = 333;
s1.d = 444;
}
// clang-format off
// DEBUG: omptarget --> Tracking released entry: HstPtr=0x[[#%x,HOST_ADDR:]], Size=[[#%u,SIZE:]], ForceDelete=0
// DEBUG: omptarget --> Moving {{.*}} bytes (tgt:0x{{.*}}) -> (hst:0x{{.*}})
// DEBUG: omptarget --> Pointer HstPtr=0x{{0*}}[[#HOST_ADDR]] falls within a range previously released
// DEBUG: omptarget --> Moving [[#SIZE]] bytes (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]])
// clang-format on
#pragma omp target exit data map(from : s1) map(mapper(my_mapper), from : s1.s2)
// CHECK: 111 222 333 444
printf("%d %d %d %d\n", s1.s2.a, s1.s2.b, s1.c, s1.d);
}