libclc: Remove r600 support (#181976)

This commit is contained in:
Matt Arsenault
2026-02-18 10:43:29 +01:00
committed by GitHub
parent c5a0742d17
commit 3d8fffec7b
35 changed files with 5 additions and 917 deletions

View File

@@ -24,7 +24,6 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
opencl/lib/clspv/SOURCES; opencl/lib/clspv/SOURCES;
opencl/lib/generic/SOURCES; opencl/lib/generic/SOURCES;
opencl/lib/ptx-nvidiacl/SOURCES; opencl/lib/ptx-nvidiacl/SOURCES;
opencl/lib/r600/SOURCES;
opencl/lib/spirv/SOURCES; opencl/lib/spirv/SOURCES;
# CLC internal libraries # CLC internal libraries
clc/lib/generic/SOURCES; clc/lib/generic/SOURCES;
@@ -32,7 +31,6 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
clc/lib/amdgpu/SOURCES; clc/lib/amdgpu/SOURCES;
clc/lib/clspv/SOURCES; clc/lib/clspv/SOURCES;
clc/lib/ptx-nvidiacl/SOURCES; clc/lib/ptx-nvidiacl/SOURCES;
clc/lib/r600/SOURCES;
clc/lib/spirv/SOURCES; clc/lib/spirv/SOURCES;
) )
@@ -151,7 +149,6 @@ set( LIBCLC_TARGETS_ALL
amdgcn-amd-amdhsa-llvm amdgcn-amd-amdhsa-llvm
clspv-- clspv--
clspv64-- clspv64--
r600--
nvptx64-- nvptx64--
nvptx64--nvidiacl nvptx64--nvidiacl
nvptx64-nvidia-cuda nvptx64-nvidia-cuda
@@ -198,7 +195,6 @@ list( SORT LIBCLC_TARGETS_TO_BUILD )
include_directories( ${LLVM_INCLUDE_DIRS} ) include_directories( ${LLVM_INCLUDE_DIRS} )
# Setup arch devices # Setup arch devices
set( r600--_devices cedar cypress barts cayman )
set( amdgcn--_devices none ) set( amdgcn--_devices none )
set( amdgcn-mesa-mesa3d_devices none ) set( amdgcn-mesa-mesa3d_devices none )
set( amdgcn-amd-amdhsa-llvm_devices none ) set( amdgcn-amd-amdhsa-llvm_devices none )
@@ -253,7 +249,6 @@ set_source_files_properties(
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp2.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp2.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_log10.cl ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_log10.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/r600/math/clc_native_rsqrt.cl
# OpenCL builtins # OpenCL builtins
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_cos.cl ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_cos.cl
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_divide.cl ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_divide.cl
@@ -283,7 +278,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
set( opencl_dirs ) set( opencl_dirs )
if( ${ARCH} STREQUAL r600 OR ${ARCH} STREQUAL amdgcn ) if( ${ARCH} STREQUAL amdgcn )
list( APPEND opencl_dirs amdgpu ) list( APPEND opencl_dirs amdgpu )
endif() endif()
@@ -414,7 +409,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# maps to the private address space. # maps to the private address space.
set ( private_addrspace_val 0 ) set ( private_addrspace_val 0 )
set ( generic_addrspace_val 0 ) set ( generic_addrspace_val 0 )
if( ARCH STREQUAL amdgcn OR ARCH STREQUAL r600 OR ARCH STREQUAL amdgcn-amdhsa ) if( ARCH STREQUAL amdgcn-amdhsa )
set ( private_addrspace_val 5 ) set ( private_addrspace_val 5 )
endif() endif()
if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64) if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64)

View File

@@ -24,11 +24,7 @@
#define PNOR 0x100 #define PNOR 0x100
#define PINF 0x200 #define PINF 0x200
#ifdef __R600__
#define __CLC_HAVE_HW_FMA32() (0)
#else
#define __CLC_HAVE_HW_FMA32() (1) #define __CLC_HAVE_HW_FMA32() (1)
#endif
#define HAVE_BITALIGN() (0) #define HAVE_BITALIGN() (0)
#define HAVE_FAST_FMA32() (0) #define HAVE_FAST_FMA32() (0)

View File

@@ -16,19 +16,13 @@
#pragma OPENCL EXTENSION cl_khr_fp64 : enable #pragma OPENCL EXTENSION cl_khr_fp64 : enable
#ifdef __AMDGCN__
#define __clc_builtin_rsq __builtin_amdgcn_rsq
#else
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
#endif
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) { _CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
uint vcc = x < 0x1p-767; uint vcc = x < 0x1p-767;
uint exp0 = vcc ? 0x100 : 0; uint exp0 = vcc ? 0x100 : 0;
unsigned exp1 = vcc ? 0xffffff80 : 0; unsigned exp1 = vcc ? 0xffffff80 : 0;
double v01 = __clc_ldexp(x, exp0); double v01 = __clc_ldexp(x, exp0);
double v23 = __clc_builtin_rsq(v01); double v23 = __builtin_amdgcn_rsq(v01);
double v45 = v01 * v23; double v45 = v01 * v23;
v23 = v23 * 0.5; v23 = v23 * 0.5;

View File

@@ -1,4 +0,0 @@
math/clc_fma.cl
math/clc_native_rsqrt.cl
math/clc_rsqrt.cl
math/clc_sw_fma.cl

View File

@@ -1,14 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/internal/clc.h>
#include <clc/internal/math/clc_sw_fma.h>
#include <clc/math/math.h>
#define __CLC_BODY <clc_fma.inc>
#include <clc/math/gentype.inc>

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_fma(__CLC_GENTYPE a, __CLC_GENTYPE b,
__CLC_GENTYPE c) {
#if __CLC_FPSIZE == 32
return __clc_sw_fma(a, b, c);
#else
return __builtin_elementwise_fma(a, b, c);
#endif
}

View File

@@ -1,18 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/internal/clc.h>
_CLC_OVERLOAD _CLC_DEF float __clc_native_rsqrt(float x) {
return __builtin_r600_recipsqrt_ieeef(x);
}
#define __CLC_FLOAT_ONLY
#define __CLC_FUNCTION __clc_native_rsqrt
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
#include <clc/math/gentype.inc>

View File

@@ -1,27 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/internal/clc.h>
_CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) {
return __builtin_r600_recipsqrt_ieeef(x);
}
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_CLC_OVERLOAD _CLC_DEF double __clc_rsqrt(double x) {
return __builtin_r600_recipsqrt_ieee(x);
}
#endif // cl_khr_fp64
#define __CLC_FUNCTION __clc_rsqrt
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
#include <clc/math/gentype.inc>

View File

@@ -1,174 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/clc_as_type.h>
#include <clc/float/definitions.h>
#include <clc/integer/clc_abs.h>
#include <clc/integer/clc_clz.h>
#include <clc/integer/definitions.h>
#include <clc/internal/clc.h>
#include <clc/math/clc_mad.h>
#include <clc/math/math.h>
#include <clc/relational/clc_isinf.h>
#include <clc/relational/clc_isnan.h>
#include <clc/shared/clc_max.h>
static _CLC_INLINE float __clc_flush_denormal(float x) {
int ix = __clc_as_int(x);
if (((ix & EXPBITS_SP32) == 0) && ((ix & MANTBITS_SP32) != 0)) {
ix &= SIGNBIT_SP32;
x = __clc_as_float(ix);
}
return x;
}
struct fp {
ulong mantissa;
int exponent;
uint sign;
};
_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
/* special cases */
if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) ||
__clc_isinf(b)) {
return __clc_mad(a, b, c);
}
/* If only c is inf, and both a,b are regular numbers, the result is c*/
if (__clc_isinf(c)) {
return c;
}
a = __clc_flush_denormal(a);
b = __clc_flush_denormal(b);
c = __clc_flush_denormal(c);
if (c == 0) {
return a * b;
}
struct fp st_a, st_b, st_c;
st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127;
st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127;
st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127;
st_a.mantissa = a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000;
st_b.mantissa = b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000;
st_c.mantissa = c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000;
st_a.sign = __clc_as_uint(a) & 0x80000000;
st_b.sign = __clc_as_uint(b) & 0x80000000;
st_c.sign = __clc_as_uint(c) & 0x80000000;
// Multiplication.
// Move the product to the highest bits to maximize precision
// mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
// Add one bit for future addition overflow,
// add another bit to detect subtraction underflow
struct fp st_mul;
st_mul.sign = st_a.sign ^ st_b.sign;
st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
// FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
if (st_mul.exponent == 0 && st_mul.mantissa == 0)
return c;
// Mantissa is 23 fractional bits, shift it the same way as product mantissa
#define C_ADJUST 37ul
// both exponents are bias adjusted
int exp_diff = st_mul.exponent - st_c.exponent;
st_c.mantissa <<= C_ADJUST;
ulong cutoff_bits = 0;
ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul;
if (exp_diff > 0) {
cutoff_bits =
exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
} else {
cutoff_bits =
-exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
}
struct fp st_fma;
st_fma.sign = st_mul.sign;
st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent);
if (st_c.sign == st_mul.sign) {
st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
} else {
// cutoff bits borrow one
st_fma.mantissa =
st_mul.mantissa - st_c.mantissa -
(cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
}
// underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
if (st_fma.mantissa > LONG_MAX) {
st_fma.mantissa = 0 - st_fma.mantissa;
st_fma.sign = st_mul.sign ^ 0x80000000;
}
// detect overflow/underflow
int overflow_bits = 3 - __clc_clz(st_fma.mantissa);
// adjust exponent
st_fma.exponent += overflow_bits;
// handle underflow
if (overflow_bits < 0) {
st_fma.mantissa <<= -overflow_bits;
overflow_bits = 0;
}
// rounding
ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
// round to nearest even
if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0)) {
st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
}
// Shift mantissa back to bit 23
st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
// Detect rounding overflow
if (st_fma.mantissa > 0xffffff) {
++st_fma.exponent;
st_fma.mantissa >>= 1;
}
if (st_fma.mantissa == 0) {
return .0f;
}
// Flating point range limit
if (st_fma.exponent > 127) {
return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign);
}
// Flush denormals
if (st_fma.exponent <= -127) {
return __clc_as_float(st_fma.sign);
}
return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
((uint)st_fma.mantissa & 0x7fffff));
}
#define __CLC_FLOAT_ONLY
#define __CLC_FUNCTION __clc_sw_fma
#define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
#include <clc/math/gentype.inc>

View File

@@ -1,8 +0,0 @@
synchronization/barrier.cl
workitem/get_global_offset.cl
workitem/get_group_id.cl
workitem/get_global_size.cl
workitem/get_local_id.cl
workitem/get_local_size.cl
workitem/get_num_groups.cl
workitem/get_work_dim.cl

View File

@@ -1,15 +0,0 @@
image/get_image_dim.cl
image/get_image_width.cl
image/get_image_height.cl
image/get_image_depth.cl
image/get_image_channel_data_type.cl
image/get_image_channel_order.cl
image/get_image_attributes_impl.ll
image/read_imagef.cl
image/read_imagei.cl
image/read_imageui.cl
image/read_image_impl.ll
image/write_imagef.cl
image/write_imagei.cl
image/write_imageui.cl
image/write_image_impl.ll

View File

@@ -1,95 +0,0 @@
;;===----------------------------------------------------------------------===;;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;;===----------------------------------------------------------------------===;;
%opencl.image2d_t = type opaque
%opencl.image3d_t = type opaque
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
%opencl.image2d_t addrspace(1)*) nounwind readnone
declare i32 @llvm.OpenCL.image.get.resource.id.3d(
%opencl.image3d_t addrspace(1)*) nounwind readnone
declare [3 x i32] @llvm.OpenCL.image.get.size.2d(
%opencl.image2d_t addrspace(1)*) nounwind readnone
declare [3 x i32] @llvm.OpenCL.image.get.size.3d(
%opencl.image3d_t addrspace(1)*) nounwind readnone
declare [2 x i32] @llvm.OpenCL.image.get.format.2d(
%opencl.image2d_t addrspace(1)*) nounwind readnone
declare [2 x i32] @llvm.OpenCL.image.get.format.3d(
%opencl.image3d_t addrspace(1)*) nounwind readnone
define i32 @__clc_get_image_width_2d(
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
%opencl.image2d_t addrspace(1)* %img)
%2 = extractvalue [3 x i32] %1, 0
ret i32 %2
}
define i32 @__clc_get_image_width_3d(
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
%opencl.image3d_t addrspace(1)* %img)
%2 = extractvalue [3 x i32] %1, 0
ret i32 %2
}
define i32 @__clc_get_image_height_2d(
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
%opencl.image2d_t addrspace(1)* %img)
%2 = extractvalue [3 x i32] %1, 1
ret i32 %2
}
define i32 @__clc_get_image_height_3d(
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
%opencl.image3d_t addrspace(1)* %img)
%2 = extractvalue [3 x i32] %1, 1
ret i32 %2
}
define i32 @__clc_get_image_depth_3d(
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
%opencl.image3d_t addrspace(1)* %img)
%2 = extractvalue [3 x i32] %1, 2
ret i32 %2
}
define i32 @__clc_get_image_channel_data_type_2d(
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
%opencl.image2d_t addrspace(1)* %img)
%2 = extractvalue [2 x i32] %1, 0
ret i32 %2
}
define i32 @__clc_get_image_channel_data_type_3d(
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
%opencl.image3d_t addrspace(1)* %img)
%2 = extractvalue [2 x i32] %1, 0
ret i32 %2
}
define i32 @__clc_get_image_channel_order_2d(
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
%opencl.image2d_t addrspace(1)* %img)
%2 = extractvalue [2 x i32] %1, 1
ret i32 %2
}
define i32 @__clc_get_image_channel_order_3d(
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
%opencl.image3d_t addrspace(1)* %img)
%2 = extractvalue [2 x i32] %1, 1
ret i32 %2
}
attributes #0 = { nounwind readnone alwaysinline }

View File

@@ -1,19 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t);
_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t);
_CLC_OVERLOAD _CLC_DEF int get_image_channel_data_type(image2d_t image) {
return __clc_get_image_channel_data_type_2d(image);
}
_CLC_OVERLOAD _CLC_DEF int get_image_channel_data_type(image3d_t image) {
return __clc_get_image_channel_data_type_3d(image);
}

View File

@@ -1,19 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t);
_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t);
_CLC_OVERLOAD _CLC_DEF int get_image_channel_order(image2d_t image) {
return __clc_get_image_channel_order_2d(image);
}
_CLC_OVERLOAD _CLC_DEF int get_image_channel_order(image3d_t image) {
return __clc_get_image_channel_order_3d(image);
}

View File

@@ -1,15 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL int __clc_get_image_depth_3d(image3d_t);
_CLC_OVERLOAD _CLC_DEF int get_image_depth(image3d_t image) {
return __clc_get_image_depth_3d(image);
}

View File

@@ -1,17 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_OVERLOAD _CLC_DEF int2 get_image_dim(image2d_t image) {
return (int2)(get_image_width(image), get_image_height(image));
}
_CLC_OVERLOAD _CLC_DEF int4 get_image_dim(image3d_t image) {
return (int4)(get_image_width(image), get_image_height(image),
get_image_depth(image), 0);
}

View File

@@ -1,19 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL int __clc_get_image_height_2d(image2d_t);
_CLC_DECL int __clc_get_image_height_3d(image3d_t);
_CLC_OVERLOAD _CLC_DEF int get_image_height(image2d_t image) {
return __clc_get_image_height_2d(image);
}
_CLC_OVERLOAD _CLC_DEF int get_image_height(image3d_t image) {
return __clc_get_image_height_3d(image);
}

View File

@@ -1,19 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL int __clc_get_image_width_2d(image2d_t);
_CLC_DECL int __clc_get_image_width_3d(image3d_t);
_CLC_OVERLOAD _CLC_DEF int get_image_width(image2d_t image) {
return __clc_get_image_width_2d(image);
}
_CLC_OVERLOAD _CLC_DEF int get_image_width(image3d_t image) {
return __clc_get_image_width_3d(image);
}

View File

@@ -1,54 +0,0 @@
;;===----------------------------------------------------------------------===;;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;;===----------------------------------------------------------------------===;;
%opencl.image2d_t = type opaque
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32,
i32, i32, i32) readnone
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
%opencl.image2d_t addrspace(1)*) nounwind readnone
declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone
define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline {
%e0 = extractelement <2 x float> %v, i32 0
%e1 = extractelement <2 x float> %v, i32 1
%res.0 = insertelement <4 x float> poison, float %e0, i32 0
%res.1 = insertelement <4 x float> %res.0, float %e1, i32 1
%res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2
%res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3
ret <4 x float> %res.3
}
define <4 x float> @__clc_read_imagef_tex(
%opencl.image2d_t addrspace(1)* nocapture %img,
i32 %sampler, <2 x float> %coord) alwaysinline {
entry:
%coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord)
%smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler)
%img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
%opencl.image2d_t addrspace(1)* %img)
%tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved.
%coord_norm = and i32 %sampler, 1
%is_norm = icmp eq i32 %coord_norm, 1
br i1 %is_norm, label %NormCoord, label %UnnormCoord
NormCoord:
%data.norm = call <4 x float> @llvm.R600.tex(
<4 x float> %coord_v4,
i32 0, i32 0, i32 0, ; Offset.
i32 2, i32 %smp_id,
i32 1, i32 1, i32 1, i32 1) ; Normalized coords.
ret <4 x float> %data.norm
UnnormCoord:
%data.unnorm = call <4 x float> @llvm.R600.tex(
<4 x float> %coord_v4,
i32 0, i32 0, i32 0, ; Offset.
i32 %tex_id, i32 %smp_id,
i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords.
ret <4 x float> %data.unnorm
}

View File

@@ -1,22 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
int2 coord) {
float2 coord_float = (float2)(coord.x, coord.y);
return __clc_read_imagef_tex(image, sampler, coord_float);
}
_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
float2 coord) {
return __clc_read_imagef_tex(image, sampler, coord);
}

View File

@@ -1,31 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
int4 __clc_reinterpret_v4f_to_v4i(float4 v) {
union {
int4 v4i;
float4 v4f;
} res = {.v4f = v};
return res.v4i;
}
_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
int2 coord) {
float2 coord_float = (float2)(coord.x, coord.y);
return __clc_reinterpret_v4f_to_v4i(
__clc_read_imagef_tex(image, sampler, coord_float));
}
_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
float2 coord) {
return __clc_reinterpret_v4f_to_v4i(
__clc_read_imagef_tex(image, sampler, coord));
}

View File

@@ -1,31 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) {
union {
uint4 v4ui;
float4 v4f;
} res = {.v4f = v};
return res.v4ui;
}
_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
int2 coord) {
float2 coord_float = (float2)(coord.x, coord.y);
return __clc_reinterpret_v4f_to_v4ui(
__clc_read_imagef_tex(image, sampler, coord_float));
}
_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
float2 coord) {
return __clc_reinterpret_v4f_to_v4ui(
__clc_read_imagef_tex(image, sampler, coord));
}

View File

@@ -1,60 +0,0 @@
;;===----------------------------------------------------------------------===;;
;
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
;;===----------------------------------------------------------------------===;;
%opencl.image2d_t = type opaque
%opencl.image3d_t = type opaque
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
%opencl.image2d_t addrspace(1)*) nounwind readnone
declare i32 @llvm.OpenCL.image.get.resource.id.3d(
%opencl.image3d_t addrspace(1)*) nounwind readnone
declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id)
define void @__clc_write_imageui_2d(
%opencl.image2d_t addrspace(1)* nocapture %img,
<2 x i32> %coord, <4 x i32> %color) #0 {
; Coordinate int2 -> int4.
%e0 = extractelement <2 x i32> %coord, i32 0
%e1 = extractelement <2 x i32> %coord, i32 1
%coord.0 = insertelement <4 x i32> poison, i32 %e0, i32 0
%coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1
%coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2
%coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3
; Get RAT ID.
%img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
%opencl.image2d_t addrspace(1)* %img)
%rat_id = add i32 %img_id, 1
; Call store intrinsic.
call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id)
ret void
}
define void @__clc_write_imagei_2d(
%opencl.image2d_t addrspace(1)* nocapture %img,
<2 x i32> %coord, <4 x i32> %color) #0 {
call void @__clc_write_imageui_2d(
%opencl.image2d_t addrspace(1)* nocapture %img,
<2 x i32> %coord, <4 x i32> %color)
ret void
}
define void @__clc_write_imagef_2d(
%opencl.image2d_t addrspace(1)* nocapture %img,
<2 x i32> %coord, <4 x float> %color) #0 {
%color.i32 = bitcast <4 x float> %color to <4 x i32>
call void @__clc_write_imageui_2d(
%opencl.image2d_t addrspace(1)* nocapture %img,
<2 x i32> %coord, <4 x i32> %color.i32)
ret void
}
attributes #0 = { alwaysinline }

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color);
_CLC_OVERLOAD _CLC_DEF void write_imagef(image2d_t image, int2 coord,
float4 color) {
__clc_write_imagef_2d(image, coord, color);
}

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color);
_CLC_OVERLOAD _CLC_DEF void write_imagei(image2d_t image, int2 coord,
int4 color) {
__clc_write_imagei_2d(image, coord, color);
}

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color);
_CLC_OVERLOAD _CLC_DEF void write_imageui(image2d_t image, int2 coord,
uint4 color) {
__clc_write_imageui_2d(image, coord, color);
}

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) {
// We should call mem_fence here, but that is not implemented for r600 yet
__clc_r600_barrier();
}

View File

@@ -1,18 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD uint get_global_offset(uint dim) {
__attribute__((address_space(7))) uint *ptr =
(__attribute__((address_space(7)))
uint *)__builtin_r600_implicitarg_ptr();
if (dim < 3)
return ptr[dim + 1];
return 0;
}

View File

@@ -1,26 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
switch (dim) {
case 0:
return __clc_r600_get_global_size_x();
case 1:
return __clc_r600_get_global_size_y();
case 2:
return __clc_r600_get_global_size_z();
default:
return 1;
}
}

View File

@@ -1,22 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD uint get_group_id(uint dim) {
switch (dim) {
case 0:
return __builtin_r600_read_tgid_x();
case 1:
return __builtin_r600_read_tgid_y();
case 2:
return __builtin_r600_read_tgid_z();
default:
return 1;
}
}

View File

@@ -1,22 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD uint get_local_id(uint dim) {
switch (dim) {
case 0:
return __builtin_r600_read_tidig_x();
case 1:
return __builtin_r600_read_tidig_y();
case 2:
return __builtin_r600_read_tidig_z();
default:
return 1;
}
}

View File

@@ -1,26 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
switch (dim) {
case 0:
return __clc_r600_get_local_size_x();
case 1:
return __clc_r600_get_local_size_y();
case 2:
return __clc_r600_get_local_size_z();
default:
return 1;
}
}

View File

@@ -1,26 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
switch (dim) {
case 0:
return __clc_r600_get_num_groups_x();
case 1:
return __clc_r600_get_num_groups_y();
case 2:
return __clc_r600_get_num_groups_z();
default:
return 1;
}
}

View File

@@ -1,16 +0,0 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <clc/opencl/opencl-base.h>
_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
__attribute__((address_space(7))) uint *ptr =
(__attribute__((address_space(7)))
uint *)__builtin_r600_implicitarg_ptr();
return ptr[0];
}

View File

@@ -37,8 +37,8 @@ granularity of individual functions.
</p> </p>
<p> <p>
libclc currently supports the AMDGCN, and R600 and NVPTX targets, but libclc currently supports the AMDGCN and NVPTX targets, but support
support for more targets is welcome. for more targets is welcome.
</p> </p>
<h2>Download</h2> <h2>Download</h2>