[flang-rt] Remove experiemental OpenMP offloading support (#183653)

Summary: This, as far as I am aware, has mostly been superceded by the runtimes build that's built on top of libc. This build links 30% faster, supports more functionality, and uses 95% less disk space, so it seems to be the direction we want to go. CUDA support remains, this is not needed urgently.
2026-03-06 09:50:57 -06:00
parent 9e15c6cc33
commit 8e40387ce4
10 changed files with 25 additions and 140 deletions
--- a/flang-rt/CMakeLists.txt
+++ b/flang-rt/CMakeLists.txt
@@ -179,11 +179,10 @@ if (NOT FLANG_RT_ENABLE_STATIC AND NOT FLANG_RT_ENABLE_SHARED)
 endif ()


-set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile Flang-RT with GPU support (CUDA or OpenMP)")
+set(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT "" CACHE STRING "Compile Flang-RT with GPU support (CUDA)")
 set_property(CACHE FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT PROPERTY STRINGS
    ""
    CUDA
-    OpenMP
  )
 if (NOT FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT)
  # Support for GPUs disabled
@@ -191,30 +190,8 @@ elseif (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
  # Support for CUDA
  set(FLANG_RT_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
  option(FLANG_RT_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS "Do not compile global variables' definitions when producing PTX library" OFF)
-elseif (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP")
-  # Support for OpenMP offloading
-  set(FLANG_RT_DEVICE_ARCHITECTURES "all" CACHE STRING
-      "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')"
-    )
-
-  if (FLANG_RT_DEVICE_ARCHITECTURES STREQUAL "all")
-    # TODO: support auto detection on the build system.
-    set(all_amdgpu_architectures
-      "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
-      "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
-      "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
-      "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
-      "gfx1152;gfx1153;gfx1170")
-    set(all_nvptx_architectures
-      "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
-      "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90")
-    set(all_gpu_architectures
-      "${all_amdgpu_architectures};${all_nvptx_architectures}")
-      set(FLANG_RT_DEVICE_ARCHITECTURES ${all_gpu_architectures})
-  endif()
-  list(REMOVE_DUPLICATES FLANG_RT_DEVICE_ARCHITECTURES)
 else ()
-  message(FATAL_ERROR "Invalid value '${FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT}' for FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT; must be empty, 'CUDA', or 'OpenMP'")
+  message(FATAL_ERROR "Invalid value '${FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT}' for FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT; must be empty or 'CUDA'")
 endif ()


--- a/flang-rt/README.md
+++ b/flang-rt/README.md
@@ -146,16 +146,12 @@ CMake itself provide.
   the compiler for `__float128` or 128-bit `long double` support.
   [More details](docs/Real16MathSupport.md).

- * `FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT` (values: `"CUDA"`,`"OpenMP"`, `""` default: `""`)
+ * `FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT` (values: `"CUDA"`, `""` default: `""`)

   When set to `CUDA`, builds Flang-RT with experimental support for GPU
   accelerators using CUDA. `CMAKE_CUDA_COMPILER` must be set if not
   automatically detected by CMake. `nvcc` as well as `clang` are supported.

-   When set to `OpenMP`, builds Flang-RT with experimental support for
-   GPU accelerators using OpenMP offloading. Only Clang is supported for
-   `CMAKE_C_COMPILER` and `CMAKE_CXX_COMPILER`.
-
 * `FLANG_RT_INCLUDE_CUF` (bool, default: `OFF`)

   Compiles the `libflang_rt.cuda_<CUDA-version>.a/.so` library. This is
@@ -181,13 +177,10 @@ additional configuration options become available.
   default.


-### Experimental OpenMP Offload Support

-With `-DFLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT=OpenMP`, the following
-additional configuration options become available.
+### GPU Offloading Support

- * `FLANG_RT_DEVICE_ARCHITECTURES` (default: `"all"`)
-
-   A list of device architectures that Flang-RT is going to support.
-   If `"all"` uses a pre-defined list of architectures. Same purpose as
-   `LIBOMPTARGET_DEVICE_ARCHITECTURES` from liboffload.
+Flang-RT can be built for GPU targets (AMDGPU, NVPTX) using the LLVM
+runtimes build infrastructure. The easiest way to configure a build for
+GPU offloading is via the CMake cache file at
+`offload/cmake/caches/FlangOffload.cmake`.
--- a/flang-rt/cmake/modules/AddFlangRTOffload.cmake
+++ b/flang-rt/cmake/modules/AddFlangRTOffload.cmake
@@ -71,45 +71,3 @@ macro(enable_cuda_compilation name files)
  endif()
 endmacro()

-macro(enable_omp_offload_compilation name files)
-  if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP")
-    # OpenMP offload build only works with Clang compiler currently.
-
-    if (FLANG_RT_ENABLE_SHARED)
-      message(FATAL_ERROR
-        "FLANG_RT_ENABLE_SHARED is not supported for OpenMP offload build of Flang-RT"
-        )
-    endif()
-
-    if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
-        "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
-
-      string(REPLACE ";" "," compile_for_architectures
-        "${FLANG_RT_DEVICE_ARCHITECTURES}"
-        )
-
-      set(OMP_COMPILE_OPTIONS
-        -fopenmp
-        -fvisibility=hidden
-        -fopenmp-cuda-mode
-        --offload-arch=${compile_for_architectures}
-        # Force LTO for the device part.
-        -foffload-lto
-        )
-      set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
-        "${OMP_COMPILE_OPTIONS}"
-        )
-      target_link_options(${name}.static PUBLIC ${OMP_COMPILE_OPTIONS})
-
-      # Enable "declare target" in the source code.
-      set_source_files_properties(${files}
-        PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
-        )
-    else()
-      message(FATAL_ERROR
-        "Flang-rt build with OpenMP offload is not supported for these compilers:\n"
-        "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
-        "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
-    endif()
-  endif()
-endmacro()
--- a/flang-rt/include/flang-rt/runtime/environment.h
+++ b/flang-rt/include/flang-rt/runtime/environment.h
@@ -40,11 +40,7 @@ struct ExecutionEnvironment {
  typedef void (*ConfigEnvCallbackPtr)(
      int, const char *[], const char *[], const EnvironmentDefaultList *);

-#if !defined(_OPENMP)
-  // FIXME: https://github.com/llvm/llvm-project/issues/84942
-  constexpr
-#endif
-      ExecutionEnvironment(){};
+  constexpr ExecutionEnvironment() {};
  void Configure(int argc, const char *argv[], const char *envp[],
      const EnvironmentDefaultList *envDefaults);

--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -200,7 +200,6 @@ if (NOT WIN32)
  )

  enable_cuda_compilation(flang_rt.runtime "${supported_sources}")
-  enable_omp_offload_compilation(flang_rt.runtime "${supported_sources}")

  # Select a default runtime, which is used for unit and regression tests.
  get_target_property(default_target flang_rt.runtime.default ALIASED_TARGET)
@@ -234,7 +233,6 @@ else()
      )

    enable_cuda_compilation(${name} "${supported_sources}")
-    enable_omp_offload_compilation(${name} "${supported_sources}")
    add_dependencies(flang_rt.runtime ${name})
  endfunction ()

--- a/flang-rt/lib/runtime/external-unit.cpp
+++ b/flang-rt/lib/runtime/external-unit.cpp
@@ -16,9 +16,6 @@
 #include "flang-rt/runtime/lock.h"
 #include "flang-rt/runtime/tools.h"

-// NOTE: the header files above may define OpenMP declare target
-// variables, so they have to be included unconditionally
-// so that the offload entries are consistent between host and device.
 #if !defined(RT_USE_PSEUDO_FILE_UNIT)

 #include <cstdio>
--- a/flang-rt/lib/runtime/pseudo-unit.cpp
+++ b/flang-rt/lib/runtime/pseudo-unit.cpp
@@ -15,9 +15,6 @@
 #include "flang-rt/runtime/io-error.h"
 #include "flang-rt/runtime/tools.h"

-// NOTE: the header files above may define OpenMP declare target
-// variables, so they have to be included unconditionally
-// so that the offload entries are consistent between host and device.
 #if defined(RT_USE_PSEUDO_FILE_UNIT)
 #include <cstdio>

--- a/flang-rt/lib/runtime/work-queue.cpp
+++ b/flang-rt/lib/runtime/work-queue.cpp
@@ -14,8 +14,7 @@

 namespace Fortran::runtime {

-#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
-// FLANG_RT_DEBUG code is disabled when false.
+#if !defined(RT_DEVICE_COMPILATION)
 static constexpr bool enableDebugOutput{false};
 #endif

@@ -79,7 +78,7 @@ RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
    last_ = newTicket;
  }
  newTicket->ticket.begun = false;
-#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
+#if !defined(RT_DEVICE_COMPILATION)
  if (enableDebugOutput &&
      (executionEnvironment.internalDebugging &
          ExecutionEnvironment::WorkQueue)) {
@@ -93,7 +92,7 @@ RT_API_ATTRS int WorkQueue::Run() {
  while (last_) {
    TicketList *at{last_};
    insertAfter_ = last_;
-#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
+#if !defined(RT_DEVICE_COMPILATION)
    if (enableDebugOutput &&
        (executionEnvironment.internalDebugging &
            ExecutionEnvironment::WorkQueue)) {
@@ -102,7 +101,7 @@ RT_API_ATTRS int WorkQueue::Run() {
    }
 #endif
    int stat{at->ticket.Continue(*this)};
-#if !defined(RT_DEVICE_COMPILATION) && !defined(OMP_OFFLOAD_BUILD)
+#if !defined(RT_DEVICE_COMPILATION)
    if (enableDebugOutput &&
        (executionEnvironment.internalDebugging &
            ExecutionEnvironment::WorkQueue)) {
--- a/flang-rt/unittests/CMakeLists.txt
+++ b/flang-rt/unittests/CMakeLists.txt
@@ -42,18 +42,6 @@ function(add_flangrt_unittest_offload_properties target)
      PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON
      )
  endif()
-  # Enable OpenMP offload during linking. We may need to replace
-  # LINK_OPTIONS with COMPILE_OPTIONS when there are OpenMP offload
-  # unittests.
-  #
-  # FIXME: replace 'native' in --offload-arch option with the list
-  #        of targets that Fortran Runtime was built for.
-  if (FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "OpenMP")
-    set_target_properties(${target}
-      PROPERTIES LINK_OPTIONS
-      "-fopenmp;--offload-arch=native"
-      )
-  endif()
 endfunction()

 # flang-rt on Windows requires compiler-rt for some symbols. For binaries built
--- a/flang/docs/GettingStarted.md
+++ b/flang/docs/GettingStarted.md
@@ -204,9 +204,18 @@ ninja install


 ### Building Flang-RT for accelerators
-Flang runtime can be built for accelerators in experimental mode, i.e.
-complete enabling is WIP.  CUDA and OpenMP target offload builds
-are currently supported.
+Flang runtime can be built for GPU targets (AMDGPU, NVPTX) using the LLVM
+runtimes build infrastructure. The recommended way to configure a build for GPU
+offloading is via the CMake cache file provided by `offload`.
+
+```bash
+cmake ../llvm -G Ninja                              \
+    -C ../offload/cmake/caches/FlangOffload.cmake   \
+    -DCMAKE_BUILD_TYPE=Release                      \
+    -DCMAKE_INSTALL_PREFIX=<PATH>
+```
+
+An experimental CUDA build of the runtime is also available.

 #### Building out-of-tree

@@ -299,33 +308,6 @@ number sufficiently low for all build jobs to fit into the available RAM. Using
 the number of harware threads (`nprocs`) is likely too much for most
 commodity machines.

-##### OpenMP target offload build
-Only Clang compiler is currently supported.
-
-```bash
-cd llvm-project
-rm -rf build_flang_runtime
-mkdir build_flang_runtime
-cd build_flang_runtime
-
-cmake \
-  -DLLVM_ENABLE_RUNTIMES=flang-rt \
-  -DFLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT="OpenMP" \
-  -DCMAKE_C_COMPILER=clang \
-  -DCMAKE_CXX_COMPILER=clang++ \
-  -DFLANG_RT_DEVICE_ARCHITECTURES=all \
-  ../runtimes/
-
-make flang-rt
-```
-
-The result of the build is a "device-only" library, i.e. the host
-part of the library is just a container for the device code.
-The resulting library may be linked to user programs using
-Clang-like device linking pipeline.
-
-The same set of CMake variables works for Flang in-tree build.
-
 ### Build options

 One may provide optional CMake variables to customize the build. Available options: