[offload] Remove LIBOMPTARGET_SHARED_MEMORY_SIZE envar (#186231)
This commit removes the `LIBOMPTARGET_SHARED_MEMORY_SIZE` envar and outputs a runtime warning if it is defined. Access to dynamic shared memory should be obtained through the `dyn_groupprivate` clause (OpenMP 6.1) or the launch arguments in liboffload kernel launch.
This commit is contained in:
committed by
GitHub
parent
e51e9afe68
commit
ac71b185c2
@@ -26,11 +26,22 @@ static uint32_t RefCount = 0;
|
||||
std::atomic<bool> RTLAlive{false};
|
||||
std::atomic<int> RTLOngoingSyncs{0};
|
||||
|
||||
/// Check deleted and deprecated features, such as environment variables.
|
||||
static void checkRuntimeEnvironment() {
|
||||
const char *ShmemEnvarName = "LIBOMPTARGET_SHARED_MEMORY_SIZE";
|
||||
if (std::getenv(ShmemEnvarName))
|
||||
MESSAGE("Warning: %s is no longer valid. Please use OpenMP clause "
|
||||
"'dyn_groupprivate' instead.\n",
|
||||
ShmemEnvarName);
|
||||
}
|
||||
|
||||
void initRuntime() {
|
||||
std::scoped_lock<decltype(PluginMtx)> Lock(PluginMtx);
|
||||
Profiler::get();
|
||||
TIMESCOPE();
|
||||
|
||||
checkRuntimeEnvironment();
|
||||
|
||||
if (PM == nullptr)
|
||||
PM = new PluginManager();
|
||||
|
||||
|
||||
@@ -211,7 +211,7 @@ DeviceTy::loadBinary(__tgt_device_image *Img) {
|
||||
DeviceEnvironment.NumDevices = RTL->getNumDevices();
|
||||
// TODO: The device ID used here is not the real device ID used by OpenMP.
|
||||
DeviceEnvironment.DeviceNum = RTLDeviceID;
|
||||
DeviceEnvironment.DynamicMemSize = GenericDevice.getDynamicMemorySize();
|
||||
DeviceEnvironment.DynamicMemSize = 0;
|
||||
DeviceEnvironment.ClockFrequency = GenericDevice.getClockFrequency();
|
||||
DeviceEnvironment.IndirectCallTable =
|
||||
reinterpret_cast<uintptr_t>(CallTablePairOrErr->first);
|
||||
|
||||
@@ -3822,10 +3822,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
KernelArgs.DynCGroupMem);
|
||||
}
|
||||
|
||||
// Increase to the requested dynamic memory size for the device if needed.
|
||||
DynBlockMemSize =
|
||||
std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
|
||||
|
||||
// HSA requires the group segment size to include both static and dynamic.
|
||||
uint32_t TotalBlockMemSize = getStaticBlockMemSize() + DynBlockMemSize;
|
||||
|
||||
|
||||
@@ -1023,7 +1023,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
|
||||
return GridValues.GV_Default_Num_Teams;
|
||||
}
|
||||
uint32_t getDebugKind() const { return OMPX_DebugKind; }
|
||||
uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
|
||||
virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC; }
|
||||
|
||||
/// Get target compute unit kind (e.g., sm_80, or gfx908).
|
||||
@@ -1196,7 +1195,6 @@ private:
|
||||
|
||||
/// Environment variables defined by the LLVM OpenMP implementation.
|
||||
Int32Envar OMPX_DebugKind;
|
||||
UInt32Envar OMPX_SharedMemorySize;
|
||||
UInt64Envar OMPX_TargetStackSize;
|
||||
UInt64Envar OMPX_TargetHeapSize;
|
||||
|
||||
|
||||
@@ -776,7 +776,6 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
|
||||
OMP_NumTeams("OMP_NUM_TEAMS"),
|
||||
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
|
||||
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
|
||||
OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
|
||||
// Do not initialize the following two envars since they depend on the
|
||||
// device initialization. These cannot be consulted until the device is
|
||||
// initialized correctly. We initialize them in GenericDeviceTy::init().
|
||||
|
||||
@@ -1491,10 +1491,6 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
|
||||
if (GenericDevice.getRPCServer())
|
||||
GenericDevice.Plugin.getRPCServer().Thread->notify();
|
||||
|
||||
// Increase to the requested dynamic memory size for the device if needed.
|
||||
DynBlockMemSize =
|
||||
std::max(DynBlockMemSize, GenericDevice.getDynamicMemorySize());
|
||||
|
||||
// In case we require more memory than the current limit.
|
||||
if (DynBlockMemSize >= MaxDynBlockMemSize) {
|
||||
CUresult AttrResult = cuFuncSetAttribute(
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
// RUN: %libomptarget-compile-generic
|
||||
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
|
||||
// RUN: %libomptarget-run-generic | %fcheck-generic
|
||||
|
||||
// RUN: %libomptarget-compileopt-generic
|
||||
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
|
||||
// RUN: %libomptarget-run-generic | %fcheck-generic
|
||||
|
||||
// REQUIRES: gpu
|
||||
// XFAIL: intelgpu
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x;
|
||||
#pragma omp target parallel map(from : x)
|
||||
{
|
||||
int *buf = llvm_omp_target_dynamic_shared_alloc() + 252;
|
||||
#pragma omp barrier
|
||||
if (omp_get_thread_num() == 0)
|
||||
*buf = 1;
|
||||
#pragma omp barrier
|
||||
if (omp_get_thread_num() == 1)
|
||||
x = *buf;
|
||||
}
|
||||
|
||||
// CHECK: PASS
|
||||
if (x == 1 && llvm_omp_target_dynamic_shared_alloc() == NULL)
|
||||
printf("PASS\n");
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -O2 -mllvm \
|
||||
// RUN: -openmp-opt-inline-device
|
||||
// RUN: env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 \
|
||||
// RUN: %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
|
||||
// REQUIRES: amdgcn-amd-amdhsa
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
int x;
|
||||
#pragma omp target parallel map(from : x)
|
||||
{
|
||||
int *buf = llvm_omp_target_dynamic_shared_alloc() + 252;
|
||||
#pragma omp barrier
|
||||
if (omp_get_thread_num() == 0)
|
||||
*buf = 1;
|
||||
#pragma omp barrier
|
||||
if (omp_get_thread_num() == 1)
|
||||
x = *buf;
|
||||
}
|
||||
|
||||
// CHECK: PASS
|
||||
if (x == 1 && llvm_omp_target_dynamic_shared_alloc() == NULL)
|
||||
printf("PASS\n");
|
||||
}
|
||||
@@ -733,7 +733,6 @@ variables is defined below.
|
||||
* ``LIBOMPTARGET_INFO=<Num>``
|
||||
* ``LIBOMPTARGET_HEAP_SIZE=<Num>``
|
||||
* ``LIBOMPTARGET_STACK_SIZE=<Num>``
|
||||
* ``LIBOMPTARGET_SHARED_MEMORY_SIZE=<Num>``
|
||||
* ``LIBOMPTARGET_MAP_FORCE_ATOMIC=[TRUE/FALSE] (default TRUE)``
|
||||
* ``LIBOMPTARGET_TREAT_ATTACH_AUTO_AS_ALWAYS=[TRUE/FALSE] (default FALSE)``
|
||||
* ``LIBOMPTARGET_JIT_OPT_LEVEL={0,1,2,3} (default 3)``
|
||||
@@ -1059,14 +1058,6 @@ allocated using ``malloc`` and ``free`` for the CUDA plugin. This is necessary
|
||||
for some applications that allocate too much memory either through the user or
|
||||
globalization.
|
||||
|
||||
LIBOMPTARGET_SHARED_MEMORY_SIZE
|
||||
"""""""""""""""""""""""""""""""
|
||||
|
||||
This environment variable sets the amount of dynamic shared memory in bytes used
|
||||
by the kernel once it is launched. A pointer to the dynamic memory buffer can be
|
||||
accessed using the ``llvm_omp_target_dynamic_shared_alloc`` function. An example
|
||||
is shown in :ref:`libomptarget_dynamic_shared`.
|
||||
|
||||
.. toctree::
|
||||
:hidden:
|
||||
:maxdepth: 1
|
||||
@@ -1233,7 +1224,6 @@ Environment Variables
|
||||
|
||||
There are several environment variables to change the behavior of the plugins:
|
||||
|
||||
* ``LIBOMPTARGET_SHARED_MEMORY_SIZE``
|
||||
* ``LIBOMPTARGET_STACK_SIZE``
|
||||
* ``LIBOMPTARGET_HEAP_SIZE``
|
||||
* ``LIBOMPTARGET_NUM_INITIAL_STREAMS``
|
||||
@@ -1247,8 +1237,8 @@ There are several environment variables to change the behavior of the plugins:
|
||||
* ``LIBOMPTARGET_AMDGPU_NUM_INITIAL_HSA_SIGNALS``
|
||||
* ``LIBOMPTARGET_AMDGPU_STREAM_BUSYWAIT``
|
||||
|
||||
The environment variables ``LIBOMPTARGET_SHARED_MEMORY_SIZE``,
|
||||
``LIBOMPTARGET_STACK_SIZE`` and ``LIBOMPTARGET_HEAP_SIZE`` are described in
|
||||
The environment variables ``LIBOMPTARGET_STACK_SIZE`` and
|
||||
``LIBOMPTARGET_HEAP_SIZE`` are described in
|
||||
:ref:`libopenmptarget_environment_vars`.
|
||||
|
||||
LIBOMPTARGET_NUM_INITIAL_STREAMS
|
||||
@@ -1401,6 +1391,10 @@ LIBOMPTARGET_RPC_LATENCY
|
||||
""""""""""""""""""""""""
|
||||
This is the maximum amount of time the client will wait for a response from the server.
|
||||
|
||||
.. warning::
|
||||
The ``LIBOMPTARGET_SHARED_MEMORY_SIZE`` environment variable is not
|
||||
supported anymore. Please use the ``dyn_groupprivate`` clause instead, as
|
||||
shown in :ref:`libomptarget_dynamic_shared`.
|
||||
|
||||
.. _libomptarget_libc:
|
||||
|
||||
@@ -1463,35 +1457,21 @@ IR during compilation.
|
||||
Dynamic Shared Memory
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The target device runtime contains a pointer to the dynamic shared memory
|
||||
buffer. This pointer can be obtained using the
|
||||
The OpenMP implementation provides access to dynamic shared memory in ``target``
|
||||
regions through the ``dyn_groupprivate`` clause, introduced in OpenMP 6.1. This
|
||||
is the preferred method to obtain dynamic shared memory. Please refer to
|
||||
the OpenMP standard documentation for more information.
|
||||
|
||||
As an alternative, the target device runtime contains a pointer to the native
|
||||
dynamic shared memory buffer. This pointer can be obtained using the
|
||||
``llvm_omp_target_dynamic_shared_alloc`` extension. If this function is called
|
||||
from the host it will simply return a null pointer. In order to use this buffer
|
||||
the kernel must be launched with an adequate amount of dynamic shared memory
|
||||
allocated. This can be done using the ``LIBOMPTARGET_SHARED_MEMORY_SIZE``
|
||||
environment variable or the ``ompx_dyn_cgroup_mem(<N>)`` target directive
|
||||
clause. Examples for both are given below.
|
||||
allocated. This can be done using the ``ompx_dyn_cgroup_mem(<N>)`` target
|
||||
directive clause. An example is given below.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
void foo() {
|
||||
int x;
|
||||
#pragma omp target parallel map(from : x)
|
||||
{
|
||||
int *buf = llvm_omp_target_dynamic_shared_alloc();
|
||||
if (omp_get_thread_num() == 0)
|
||||
*buf = 1;
|
||||
#pragma omp barrier
|
||||
if (omp_get_thread_num() == 1)
|
||||
x = *buf;
|
||||
}
|
||||
assert(x == 1);
|
||||
}
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ clang++ -fopenmp --offload-arch=sm_80 -O3 shared.c
|
||||
$ env LIBOMPTARGET_SHARED_MEMORY_SIZE=256 ./shared
|
||||
Please notice that the ``LIBOMPTARGET_SHARED_MEMORY_SIZE`` environment variable
|
||||
is not supported anymore.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
@@ -1509,11 +1489,6 @@ clause. Examples for both are given below.
|
||||
assert(x == 1);
|
||||
}
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
|
||||
$ env ./shared
|
||||
|
||||
.. _libomptarget_device_allocator:
|
||||
|
||||
Device Allocation
|
||||
|
||||
Reference in New Issue
Block a user