libclc: Remove r600 support (#181976)
This commit is contained in:
@@ -24,7 +24,6 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
|
||||
opencl/lib/clspv/SOURCES;
|
||||
opencl/lib/generic/SOURCES;
|
||||
opencl/lib/ptx-nvidiacl/SOURCES;
|
||||
opencl/lib/r600/SOURCES;
|
||||
opencl/lib/spirv/SOURCES;
|
||||
# CLC internal libraries
|
||||
clc/lib/generic/SOURCES;
|
||||
@@ -32,7 +31,6 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
|
||||
clc/lib/amdgpu/SOURCES;
|
||||
clc/lib/clspv/SOURCES;
|
||||
clc/lib/ptx-nvidiacl/SOURCES;
|
||||
clc/lib/r600/SOURCES;
|
||||
clc/lib/spirv/SOURCES;
|
||||
)
|
||||
|
||||
@@ -151,7 +149,6 @@ set( LIBCLC_TARGETS_ALL
|
||||
amdgcn-amd-amdhsa-llvm
|
||||
clspv--
|
||||
clspv64--
|
||||
r600--
|
||||
nvptx64--
|
||||
nvptx64--nvidiacl
|
||||
nvptx64-nvidia-cuda
|
||||
@@ -198,7 +195,6 @@ list( SORT LIBCLC_TARGETS_TO_BUILD )
|
||||
include_directories( ${LLVM_INCLUDE_DIRS} )
|
||||
|
||||
# Setup arch devices
|
||||
set( r600--_devices cedar cypress barts cayman )
|
||||
set( amdgcn--_devices none )
|
||||
set( amdgcn-mesa-mesa3d_devices none )
|
||||
set( amdgcn-amd-amdhsa-llvm_devices none )
|
||||
@@ -253,7 +249,6 @@ set_source_files_properties(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp2.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_exp.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/amdgpu/math/clc_native_log10.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/r600/math/clc_native_rsqrt.cl
|
||||
# OpenCL builtins
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_cos.cl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/math/native_divide.cl
|
||||
@@ -283,7 +278,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
|
||||
|
||||
set( opencl_dirs )
|
||||
|
||||
if( ${ARCH} STREQUAL r600 OR ${ARCH} STREQUAL amdgcn )
|
||||
if( ${ARCH} STREQUAL amdgcn )
|
||||
list( APPEND opencl_dirs amdgpu )
|
||||
endif()
|
||||
|
||||
@@ -414,7 +409,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
|
||||
# maps to the private address space.
|
||||
set ( private_addrspace_val 0 )
|
||||
set ( generic_addrspace_val 0 )
|
||||
if( ARCH STREQUAL amdgcn OR ARCH STREQUAL r600 OR ARCH STREQUAL amdgcn-amdhsa )
|
||||
if( ARCH STREQUAL amdgcn-amdhsa )
|
||||
set ( private_addrspace_val 5 )
|
||||
endif()
|
||||
if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64)
|
||||
|
||||
@@ -24,11 +24,7 @@
|
||||
#define PNOR 0x100
|
||||
#define PINF 0x200
|
||||
|
||||
#ifdef __R600__
|
||||
#define __CLC_HAVE_HW_FMA32() (0)
|
||||
#else
|
||||
#define __CLC_HAVE_HW_FMA32() (1)
|
||||
#endif
|
||||
|
||||
#define HAVE_BITALIGN() (0)
|
||||
#define HAVE_FAST_FMA32() (0)
|
||||
|
||||
@@ -16,19 +16,13 @@
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
#ifdef __AMDGCN__
|
||||
#define __clc_builtin_rsq __builtin_amdgcn_rsq
|
||||
#else
|
||||
#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) {
|
||||
uint vcc = x < 0x1p-767;
|
||||
uint exp0 = vcc ? 0x100 : 0;
|
||||
unsigned exp1 = vcc ? 0xffffff80 : 0;
|
||||
|
||||
double v01 = __clc_ldexp(x, exp0);
|
||||
double v23 = __clc_builtin_rsq(v01);
|
||||
double v23 = __builtin_amdgcn_rsq(v01);
|
||||
double v45 = v01 * v23;
|
||||
v23 = v23 * 0.5;
|
||||
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
math/clc_fma.cl
|
||||
math/clc_native_rsqrt.cl
|
||||
math/clc_rsqrt.cl
|
||||
math/clc_sw_fma.cl
|
||||
@@ -1,14 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/internal/math/clc_sw_fma.h>
|
||||
#include <clc/math/math.h>
|
||||
|
||||
#define __CLC_BODY <clc_fma.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_fma(__CLC_GENTYPE a, __CLC_GENTYPE b,
|
||||
__CLC_GENTYPE c) {
|
||||
#if __CLC_FPSIZE == 32
|
||||
return __clc_sw_fma(a, b, c);
|
||||
#else
|
||||
return __builtin_elementwise_fma(a, b, c);
|
||||
#endif
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float __clc_native_rsqrt(float x) {
|
||||
return __builtin_r600_recipsqrt_ieeef(x);
|
||||
}
|
||||
|
||||
#define __CLC_FLOAT_ONLY
|
||||
#define __CLC_FUNCTION __clc_native_rsqrt
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,27 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/internal/clc.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float __clc_rsqrt(float x) {
|
||||
return __builtin_r600_recipsqrt_ieeef(x);
|
||||
}
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double __clc_rsqrt(double x) {
|
||||
return __builtin_r600_recipsqrt_ieee(x);
|
||||
}
|
||||
|
||||
#endif // cl_khr_fp64
|
||||
|
||||
#define __CLC_FUNCTION __clc_rsqrt
|
||||
#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,174 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/clc_as_type.h>
|
||||
#include <clc/float/definitions.h>
|
||||
#include <clc/integer/clc_abs.h>
|
||||
#include <clc/integer/clc_clz.h>
|
||||
#include <clc/integer/definitions.h>
|
||||
#include <clc/internal/clc.h>
|
||||
#include <clc/math/clc_mad.h>
|
||||
#include <clc/math/math.h>
|
||||
#include <clc/relational/clc_isinf.h>
|
||||
#include <clc/relational/clc_isnan.h>
|
||||
#include <clc/shared/clc_max.h>
|
||||
|
||||
static _CLC_INLINE float __clc_flush_denormal(float x) {
|
||||
int ix = __clc_as_int(x);
|
||||
if (((ix & EXPBITS_SP32) == 0) && ((ix & MANTBITS_SP32) != 0)) {
|
||||
ix &= SIGNBIT_SP32;
|
||||
x = __clc_as_float(ix);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
struct fp {
|
||||
ulong mantissa;
|
||||
int exponent;
|
||||
uint sign;
|
||||
};
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) {
|
||||
/* special cases */
|
||||
if (__clc_isnan(a) || __clc_isnan(b) || __clc_isnan(c) || __clc_isinf(a) ||
|
||||
__clc_isinf(b)) {
|
||||
return __clc_mad(a, b, c);
|
||||
}
|
||||
|
||||
/* If only c is inf, and both a,b are regular numbers, the result is c*/
|
||||
if (__clc_isinf(c)) {
|
||||
return c;
|
||||
}
|
||||
|
||||
a = __clc_flush_denormal(a);
|
||||
b = __clc_flush_denormal(b);
|
||||
c = __clc_flush_denormal(c);
|
||||
|
||||
if (c == 0) {
|
||||
return a * b;
|
||||
}
|
||||
|
||||
struct fp st_a, st_b, st_c;
|
||||
|
||||
st_a.exponent = a == .0f ? 0 : ((__clc_as_uint(a) & 0x7f800000) >> 23) - 127;
|
||||
st_b.exponent = b == .0f ? 0 : ((__clc_as_uint(b) & 0x7f800000) >> 23) - 127;
|
||||
st_c.exponent = c == .0f ? 0 : ((__clc_as_uint(c) & 0x7f800000) >> 23) - 127;
|
||||
|
||||
st_a.mantissa = a == .0f ? 0 : (__clc_as_uint(a) & 0x7fffff) | 0x800000;
|
||||
st_b.mantissa = b == .0f ? 0 : (__clc_as_uint(b) & 0x7fffff) | 0x800000;
|
||||
st_c.mantissa = c == .0f ? 0 : (__clc_as_uint(c) & 0x7fffff) | 0x800000;
|
||||
|
||||
st_a.sign = __clc_as_uint(a) & 0x80000000;
|
||||
st_b.sign = __clc_as_uint(b) & 0x80000000;
|
||||
st_c.sign = __clc_as_uint(c) & 0x80000000;
|
||||
|
||||
// Multiplication.
|
||||
// Move the product to the highest bits to maximize precision
|
||||
// mantissa is 24 bits => product is 48 bits, 2bits non-fraction.
|
||||
// Add one bit for future addition overflow,
|
||||
// add another bit to detect subtraction underflow
|
||||
struct fp st_mul;
|
||||
st_mul.sign = st_a.sign ^ st_b.sign;
|
||||
st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul;
|
||||
st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0;
|
||||
|
||||
// FIXME: Detecting a == 0 || b == 0 above crashed GCN isel
|
||||
if (st_mul.exponent == 0 && st_mul.mantissa == 0)
|
||||
return c;
|
||||
|
||||
// Mantissa is 23 fractional bits, shift it the same way as product mantissa
|
||||
#define C_ADJUST 37ul
|
||||
|
||||
// both exponents are bias adjusted
|
||||
int exp_diff = st_mul.exponent - st_c.exponent;
|
||||
|
||||
st_c.mantissa <<= C_ADJUST;
|
||||
ulong cutoff_bits = 0;
|
||||
ulong cutoff_mask = (1ul << __clc_abs(exp_diff)) - 1ul;
|
||||
if (exp_diff > 0) {
|
||||
cutoff_bits =
|
||||
exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask);
|
||||
st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff);
|
||||
} else {
|
||||
cutoff_bits =
|
||||
-exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask);
|
||||
st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff);
|
||||
}
|
||||
|
||||
struct fp st_fma;
|
||||
st_fma.sign = st_mul.sign;
|
||||
st_fma.exponent = __clc_max(st_mul.exponent, st_c.exponent);
|
||||
if (st_c.sign == st_mul.sign) {
|
||||
st_fma.mantissa = st_mul.mantissa + st_c.mantissa;
|
||||
} else {
|
||||
// cutoff bits borrow one
|
||||
st_fma.mantissa =
|
||||
st_mul.mantissa - st_c.mantissa -
|
||||
(cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0);
|
||||
}
|
||||
|
||||
// underflow: st_c.sign != st_mul.sign, and magnitude switches the sign
|
||||
if (st_fma.mantissa > LONG_MAX) {
|
||||
st_fma.mantissa = 0 - st_fma.mantissa;
|
||||
st_fma.sign = st_mul.sign ^ 0x80000000;
|
||||
}
|
||||
|
||||
// detect overflow/underflow
|
||||
int overflow_bits = 3 - __clc_clz(st_fma.mantissa);
|
||||
|
||||
// adjust exponent
|
||||
st_fma.exponent += overflow_bits;
|
||||
|
||||
// handle underflow
|
||||
if (overflow_bits < 0) {
|
||||
st_fma.mantissa <<= -overflow_bits;
|
||||
overflow_bits = 0;
|
||||
}
|
||||
|
||||
// rounding
|
||||
ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1;
|
||||
ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0);
|
||||
ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits));
|
||||
ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits));
|
||||
|
||||
// round to nearest even
|
||||
if ((trunc_bits > grs_bits) || (trunc_bits == grs_bits && last_bit != 0)) {
|
||||
st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits));
|
||||
}
|
||||
|
||||
// Shift mantissa back to bit 23
|
||||
st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits));
|
||||
|
||||
// Detect rounding overflow
|
||||
if (st_fma.mantissa > 0xffffff) {
|
||||
++st_fma.exponent;
|
||||
st_fma.mantissa >>= 1;
|
||||
}
|
||||
|
||||
if (st_fma.mantissa == 0) {
|
||||
return .0f;
|
||||
}
|
||||
|
||||
// Flating point range limit
|
||||
if (st_fma.exponent > 127) {
|
||||
return __clc_as_float(__clc_as_uint(INFINITY) | st_fma.sign);
|
||||
}
|
||||
|
||||
// Flush denormals
|
||||
if (st_fma.exponent <= -127) {
|
||||
return __clc_as_float(st_fma.sign);
|
||||
}
|
||||
|
||||
return __clc_as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) |
|
||||
((uint)st_fma.mantissa & 0x7fffff));
|
||||
}
|
||||
|
||||
#define __CLC_FLOAT_ONLY
|
||||
#define __CLC_FUNCTION __clc_sw_fma
|
||||
#define __CLC_BODY <clc/shared/ternary_def_scalarize.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
@@ -1,8 +0,0 @@
|
||||
synchronization/barrier.cl
|
||||
workitem/get_global_offset.cl
|
||||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_id.cl
|
||||
workitem/get_local_size.cl
|
||||
workitem/get_num_groups.cl
|
||||
workitem/get_work_dim.cl
|
||||
@@ -1,15 +0,0 @@
|
||||
image/get_image_dim.cl
|
||||
image/get_image_width.cl
|
||||
image/get_image_height.cl
|
||||
image/get_image_depth.cl
|
||||
image/get_image_channel_data_type.cl
|
||||
image/get_image_channel_order.cl
|
||||
image/get_image_attributes_impl.ll
|
||||
image/read_imagef.cl
|
||||
image/read_imagei.cl
|
||||
image/read_imageui.cl
|
||||
image/read_image_impl.ll
|
||||
image/write_imagef.cl
|
||||
image/write_imagei.cl
|
||||
image/write_imageui.cl
|
||||
image/write_image_impl.ll
|
||||
@@ -1,95 +0,0 @@
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
;
|
||||
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
; See https://llvm.org/LICENSE.txt for license information.
|
||||
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
;
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
|
||||
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
|
||||
%opencl.image2d_t addrspace(1)*) nounwind readnone
|
||||
declare i32 @llvm.OpenCL.image.get.resource.id.3d(
|
||||
%opencl.image3d_t addrspace(1)*) nounwind readnone
|
||||
|
||||
declare [3 x i32] @llvm.OpenCL.image.get.size.2d(
|
||||
%opencl.image2d_t addrspace(1)*) nounwind readnone
|
||||
declare [3 x i32] @llvm.OpenCL.image.get.size.3d(
|
||||
%opencl.image3d_t addrspace(1)*) nounwind readnone
|
||||
|
||||
declare [2 x i32] @llvm.OpenCL.image.get.format.2d(
|
||||
%opencl.image2d_t addrspace(1)*) nounwind readnone
|
||||
declare [2 x i32] @llvm.OpenCL.image.get.format.3d(
|
||||
%opencl.image3d_t addrspace(1)*) nounwind readnone
|
||||
|
||||
define i32 @__clc_get_image_width_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [3 x i32] %1, 0
|
||||
ret i32 %2
|
||||
}
|
||||
define i32 @__clc_get_image_width_3d(
|
||||
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
|
||||
%opencl.image3d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [3 x i32] %1, 0
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @__clc_get_image_height_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [3 x i32] %1, 1
|
||||
ret i32 %2
|
||||
}
|
||||
define i32 @__clc_get_image_height_3d(
|
||||
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
|
||||
%opencl.image3d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [3 x i32] %1, 1
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @__clc_get_image_depth_3d(
|
||||
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [3 x i32] @llvm.OpenCL.image.get.size.3d(
|
||||
%opencl.image3d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [3 x i32] %1, 2
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @__clc_get_image_channel_data_type_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [2 x i32] %1, 0
|
||||
ret i32 %2
|
||||
}
|
||||
define i32 @__clc_get_image_channel_data_type_3d(
|
||||
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
|
||||
%opencl.image3d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [2 x i32] %1, 0
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
define i32 @__clc_get_image_channel_order_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [2 x i32] %1, 1
|
||||
ret i32 %2
|
||||
}
|
||||
define i32 @__clc_get_image_channel_order_3d(
|
||||
%opencl.image3d_t addrspace(1)* nocapture %img) #0 {
|
||||
%1 = tail call [2 x i32] @llvm.OpenCL.image.get.format.3d(
|
||||
%opencl.image3d_t addrspace(1)* %img)
|
||||
%2 = extractvalue [2 x i32] %1, 1
|
||||
ret i32 %2
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone alwaysinline }
|
||||
@@ -1,19 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL int __clc_get_image_channel_data_type_2d(image2d_t);
|
||||
_CLC_DECL int __clc_get_image_channel_data_type_3d(image3d_t);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_channel_data_type(image2d_t image) {
|
||||
return __clc_get_image_channel_data_type_2d(image);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_channel_data_type(image3d_t image) {
|
||||
return __clc_get_image_channel_data_type_3d(image);
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL int __clc_get_image_channel_order_2d(image2d_t);
|
||||
_CLC_DECL int __clc_get_image_channel_order_3d(image3d_t);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_channel_order(image2d_t image) {
|
||||
return __clc_get_image_channel_order_2d(image);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_channel_order(image3d_t image) {
|
||||
return __clc_get_image_channel_order_3d(image);
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL int __clc_get_image_depth_3d(image3d_t);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_depth(image3d_t image) {
|
||||
return __clc_get_image_depth_3d(image);
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int2 get_image_dim(image2d_t image) {
|
||||
return (int2)(get_image_width(image), get_image_height(image));
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int4 get_image_dim(image3d_t image) {
|
||||
return (int4)(get_image_width(image), get_image_height(image),
|
||||
get_image_depth(image), 0);
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL int __clc_get_image_height_2d(image2d_t);
|
||||
_CLC_DECL int __clc_get_image_height_3d(image3d_t);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_height(image2d_t image) {
|
||||
return __clc_get_image_height_2d(image);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_height(image3d_t image) {
|
||||
return __clc_get_image_height_3d(image);
|
||||
}
|
||||
@@ -1,19 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL int __clc_get_image_width_2d(image2d_t);
|
||||
_CLC_DECL int __clc_get_image_width_3d(image3d_t);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_width(image2d_t image) {
|
||||
return __clc_get_image_width_2d(image);
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int get_image_width(image3d_t image) {
|
||||
return __clc_get_image_width_3d(image);
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
;
|
||||
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
; See https://llvm.org/LICENSE.txt for license information.
|
||||
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
;
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
|
||||
%opencl.image2d_t = type opaque
|
||||
|
||||
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32) readnone
|
||||
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
|
||||
%opencl.image2d_t addrspace(1)*) nounwind readnone
|
||||
declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) readnone
|
||||
|
||||
define <4 x float> @__clc_v4f_from_v2f(<2 x float> %v) alwaysinline {
|
||||
%e0 = extractelement <2 x float> %v, i32 0
|
||||
%e1 = extractelement <2 x float> %v, i32 1
|
||||
%res.0 = insertelement <4 x float> poison, float %e0, i32 0
|
||||
%res.1 = insertelement <4 x float> %res.0, float %e1, i32 1
|
||||
%res.2 = insertelement <4 x float> %res.1, float 0.0, i32 2
|
||||
%res.3 = insertelement <4 x float> %res.2, float 0.0, i32 3
|
||||
ret <4 x float> %res.3
|
||||
}
|
||||
|
||||
define <4 x float> @__clc_read_imagef_tex(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
i32 %sampler, <2 x float> %coord) alwaysinline {
|
||||
entry:
|
||||
%coord_v4 = call <4 x float> @__clc_v4f_from_v2f(<2 x float> %coord)
|
||||
%smp_id = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %sampler)
|
||||
%img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%tex_id = add i32 %img_id, 2 ; First 2 IDs are reserved.
|
||||
|
||||
%coord_norm = and i32 %sampler, 1
|
||||
%is_norm = icmp eq i32 %coord_norm, 1
|
||||
br i1 %is_norm, label %NormCoord, label %UnnormCoord
|
||||
NormCoord:
|
||||
%data.norm = call <4 x float> @llvm.R600.tex(
|
||||
<4 x float> %coord_v4,
|
||||
i32 0, i32 0, i32 0, ; Offset.
|
||||
i32 2, i32 %smp_id,
|
||||
i32 1, i32 1, i32 1, i32 1) ; Normalized coords.
|
||||
ret <4 x float> %data.norm
|
||||
UnnormCoord:
|
||||
%data.unnorm = call <4 x float> @llvm.R600.tex(
|
||||
<4 x float> %coord_v4,
|
||||
i32 0, i32 0, i32 0, ; Offset.
|
||||
i32 %tex_id, i32 %smp_id,
|
||||
i32 0, i32 0, i32 0, i32 0) ; Unnormalized coords.
|
||||
ret <4 x float> %data.unnorm
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
|
||||
int2 coord) {
|
||||
float2 coord_float = (float2)(coord.x, coord.y);
|
||||
return __clc_read_imagef_tex(image, sampler, coord_float);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF float4 read_imagef(image2d_t image, sampler_t sampler,
|
||||
float2 coord) {
|
||||
return __clc_read_imagef_tex(image, sampler, coord);
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
|
||||
|
||||
int4 __clc_reinterpret_v4f_to_v4i(float4 v) {
|
||||
union {
|
||||
int4 v4i;
|
||||
float4 v4f;
|
||||
} res = {.v4f = v};
|
||||
return res.v4i;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
|
||||
int2 coord) {
|
||||
float2 coord_float = (float2)(coord.x, coord.y);
|
||||
return __clc_reinterpret_v4f_to_v4i(
|
||||
__clc_read_imagef_tex(image, sampler, coord_float));
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF int4 read_imagei(image2d_t image, sampler_t sampler,
|
||||
float2 coord) {
|
||||
return __clc_reinterpret_v4f_to_v4i(
|
||||
__clc_read_imagef_tex(image, sampler, coord));
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL float4 __clc_read_imagef_tex(image2d_t, sampler_t, float2);
|
||||
|
||||
uint4 __clc_reinterpret_v4f_to_v4ui(float4 v) {
|
||||
union {
|
||||
uint4 v4ui;
|
||||
float4 v4f;
|
||||
} res = {.v4f = v};
|
||||
return res.v4ui;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
|
||||
int2 coord) {
|
||||
float2 coord_float = (float2)(coord.x, coord.y);
|
||||
return __clc_reinterpret_v4f_to_v4ui(
|
||||
__clc_read_imagef_tex(image, sampler, coord_float));
|
||||
}
|
||||
_CLC_OVERLOAD _CLC_DEF uint4 read_imageui(image2d_t image, sampler_t sampler,
|
||||
float2 coord) {
|
||||
return __clc_reinterpret_v4f_to_v4ui(
|
||||
__clc_read_imagef_tex(image, sampler, coord));
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
;
|
||||
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
; See https://llvm.org/LICENSE.txt for license information.
|
||||
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
;
|
||||
;;===----------------------------------------------------------------------===;;
|
||||
|
||||
%opencl.image2d_t = type opaque
|
||||
%opencl.image3d_t = type opaque
|
||||
|
||||
declare i32 @llvm.OpenCL.image.get.resource.id.2d(
|
||||
%opencl.image2d_t addrspace(1)*) nounwind readnone
|
||||
declare i32 @llvm.OpenCL.image.get.resource.id.3d(
|
||||
%opencl.image3d_t addrspace(1)*) nounwind readnone
|
||||
|
||||
declare void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord, i32 %rat_id)
|
||||
|
||||
define void @__clc_write_imageui_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
<2 x i32> %coord, <4 x i32> %color) #0 {
|
||||
|
||||
; Coordinate int2 -> int4.
|
||||
%e0 = extractelement <2 x i32> %coord, i32 0
|
||||
%e1 = extractelement <2 x i32> %coord, i32 1
|
||||
%coord.0 = insertelement <4 x i32> poison, i32 %e0, i32 0
|
||||
%coord.1 = insertelement <4 x i32> %coord.0, i32 %e1, i32 1
|
||||
%coord.2 = insertelement <4 x i32> %coord.1, i32 0, i32 2
|
||||
%coord.3 = insertelement <4 x i32> %coord.2, i32 0, i32 3
|
||||
|
||||
; Get RAT ID.
|
||||
%img_id = call i32 @llvm.OpenCL.image.get.resource.id.2d(
|
||||
%opencl.image2d_t addrspace(1)* %img)
|
||||
%rat_id = add i32 %img_id, 1
|
||||
|
||||
; Call store intrinsic.
|
||||
call void @llvm.r600.rat.store.typed(<4 x i32> %color, <4 x i32> %coord.3, i32 %rat_id)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_write_imagei_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
<2 x i32> %coord, <4 x i32> %color) #0 {
|
||||
call void @__clc_write_imageui_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
<2 x i32> %coord, <4 x i32> %color)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__clc_write_imagef_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
<2 x i32> %coord, <4 x float> %color) #0 {
|
||||
%color.i32 = bitcast <4 x float> %color to <4 x i32>
|
||||
call void @__clc_write_imageui_2d(
|
||||
%opencl.image2d_t addrspace(1)* nocapture %img,
|
||||
<2 x i32> %coord, <4 x i32> %color.i32)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { alwaysinline }
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL void __clc_write_imagef_2d(image2d_t image, int2 coord, float4 color);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF void write_imagef(image2d_t image, int2 coord,
|
||||
float4 color) {
|
||||
__clc_write_imagef_2d(image, coord, color);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL void __clc_write_imagei_2d(image2d_t image, int2 coord, int4 color);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF void write_imagei(image2d_t image, int2 coord,
|
||||
int4 color) {
|
||||
__clc_write_imagei_2d(image, coord, color);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DECL void __clc_write_imageui_2d(image2d_t image, int2 coord, uint4 color);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF void write_imageui(image2d_t image, int2 coord,
|
||||
uint4 color) {
|
||||
__clc_write_imageui_2d(image, coord, color);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) {
|
||||
// We should call mem_fence here, but that is not implemented for r600 yet
|
||||
__clc_r600_barrier();
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD uint get_global_offset(uint dim) {
|
||||
__attribute__((address_space(7))) uint *ptr =
|
||||
(__attribute__((address_space(7)))
|
||||
uint *)__builtin_r600_implicitarg_ptr();
|
||||
if (dim < 3)
|
||||
return ptr[dim + 1];
|
||||
return 0;
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
|
||||
uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
|
||||
uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_r600_get_global_size_x();
|
||||
case 1:
|
||||
return __clc_r600_get_global_size_y();
|
||||
case 2:
|
||||
return __clc_r600_get_global_size_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD uint get_group_id(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __builtin_r600_read_tgid_x();
|
||||
case 1:
|
||||
return __builtin_r600_read_tgid_y();
|
||||
case 2:
|
||||
return __builtin_r600_read_tgid_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD uint get_local_id(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __builtin_r600_read_tidig_x();
|
||||
case 1:
|
||||
return __builtin_r600_read_tidig_y();
|
||||
case 2:
|
||||
return __builtin_r600_read_tidig_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
|
||||
uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
|
||||
uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_r600_get_local_size_x();
|
||||
case 1:
|
||||
return __clc_r600_get_local_size_y();
|
||||
case 2:
|
||||
return __clc_r600_get_local_size_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
|
||||
uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
|
||||
uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_r600_get_num_groups_x();
|
||||
case 1:
|
||||
return __clc_r600_get_num_groups_y();
|
||||
case 2:
|
||||
return __clc_r600_get_num_groups_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include <clc/opencl/opencl-base.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
|
||||
__attribute__((address_space(7))) uint *ptr =
|
||||
(__attribute__((address_space(7)))
|
||||
uint *)__builtin_r600_implicitarg_ptr();
|
||||
return ptr[0];
|
||||
}
|
||||
@@ -37,8 +37,8 @@ granularity of individual functions.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
libclc currently supports the AMDGCN, and R600 and NVPTX targets, but
|
||||
support for more targets is welcome.
|
||||
libclc currently supports the AMDGCN and NVPTX targets, but support
|
||||
for more targets is welcome.
|
||||
</p>
|
||||
|
||||
<h2>Download</h2>
|
||||
|
||||
Reference in New Issue
Block a user