From 789ec8baad24c60eadee76f65619da35bfa32a89 Mon Sep 17 00:00:00 2001 From: GYDmedwin Date: Sat, 20 Nov 2021 00:18:32 +0800 Subject: [PATCH 01/14] [libclc] Add new function implementations in math. (#4864) In the math package, there are only atan.cl, cbrt.cl, cos.cl, sin.cl, and sqrt.cl files initially. The other files in the math package I submitted this time are all newly implemented. We are using the SYCL-CTS test set for testing, one of which is the math_builtin_api test. Only when these files are newly added can it be compiled and passed. --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 47 +++++++++- libclc/amdgcn-amdhsa/libspirv/math/acos.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/acosh.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/asin.cl | 21 +++++ libclc/amdgcn-amdhsa/libspirv/math/asinh.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/atan2.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/atanh.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/ceil.cl | 19 ++++ .../amdgcn-amdhsa/libspirv/math/copysign.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/cos.cl | 14 ++- libclc/amdgcn-amdhsa/libspirv/math/cosh.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/cospi.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/erf.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/erfc.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/exp.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/exp10.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/exp2.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/expm1.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/fabs.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/fdim.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/floor.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/fmax.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/fmin.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/fmod.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/frexp.cl | 86 +++++++++++++++++++ libclc/amdgcn-amdhsa/libspirv/math/frexp.inc | 64 ++++++++++++++ libclc/amdgcn-amdhsa/libspirv/math/hypot.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl | 35 ++++++++ libclc/amdgcn-amdhsa/libspirv/math/lgamma.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/log.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/log10.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/log1p.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/logb.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/modf.cl | 60 +++++++++++++ .../amdgcn-amdhsa/libspirv/math/nextafter.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/pow.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/round.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/rsqrt.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/sin.cl | 13 ++- libclc/amdgcn-amdhsa/libspirv/math/sincos.cl | 60 +++++++++++++ libclc/amdgcn-amdhsa/libspirv/math/sinpi.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/sqrt.cl | 16 ++-- libclc/amdgcn-amdhsa/libspirv/math/tan.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/tanh.cl | 19 ++++ libclc/amdgcn-amdhsa/libspirv/math/trunc.cl | 19 ++++ 45 files changed, 1063 insertions(+), 18 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/acos.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/acosh.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/asin.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/asinh.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/atan2.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/atanh.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/ceil.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/copysign.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/cosh.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/cospi.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/erf.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/erfc.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/exp.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/exp10.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/exp2.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/expm1.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/fabs.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/fdim.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/floor.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/fmax.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/fmin.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/fmod.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/frexp.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/frexp.inc create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/hypot.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/lgamma.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/log.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/log10.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/log1p.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/logb.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/modf.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/nextafter.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/pow.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/round.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/rsqrt.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/sincos.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/sinpi.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/tan.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/tanh.cl create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/trunc.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 1a6e3dfa7d3e7..2a0650facae28 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -1,11 +1,54 @@ atomic/loadstore_helpers.ll cl_khr_int64_extended_atomics/minmax_helpers.ll synchronization/barrier.cl +math/acos.cl +math/acosh.cl +math/asin.cl +math/asinh.cl +math/atan.cl +math/atan2.cl +math/atanh.cl +math/cbrt.cl +math/ceil.cl +math/copysign.cl math/cos.cl +math/cosh.cl +math/cospi.cl +math/erf.cl +math/erfc.cl +math/exp.cl +math/exp10.cl +math/exp2.cl +math/expm1.cl +math/fabs.cl +math/fdim.cl +math/floor.cl +math/fma.cl +math/fmax.cl +math/fmin.cl +math/fmod.cl +math/frexp.cl +math/hypot.cl +math/ilogb.cl +math/ldexp.cl +math/lgamma.cl +math/log.cl +math/log10.cl +math/log1p.cl +math/logb.cl +math/modf.cl +math/nextafter.cl +math/pow.cl +math/round.cl +math/rsqrt.cl math/sin.cl +math/sincos.cl +math/sinh.cl +math/sinpi.cl math/sqrt.cl -math/atan.cl -math/cbrt.cl +math/tan.cl +math/tanh.cl +math/trunc.cl workitem/get_global_size.cl workitem/get_local_size.cl workitem/get_num_groups.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/math/acos.cl b/libclc/amdgcn-amdhsa/libspirv/math/acos.cl new file mode 100644 index 0000000000000..a18feb52d332e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/acos.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_acos_f64(double); +float __ocml_acos_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_acos +#define __CLC_BUILTIN __ocml_acos +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/acosh.cl b/libclc/amdgcn-amdhsa/libspirv/math/acosh.cl new file mode 100644 index 0000000000000..7eb8c824ba58c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/acosh.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_acosh_f64(double); +float __ocml_acosh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_acosh +#define __CLC_BUILTIN __ocml_acosh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/asin.cl b/libclc/amdgcn-amdhsa/libspirv/math/asin.cl new file mode 100644 index 0000000000000..365f47819b204 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/asin.cl @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_asin_f64(double); +float __ocml_asin_f32(float); + + +#define __CLC_FUNCTION __spirv_ocl_asin +#define __CLC_BUILTIN __ocml_asin + +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/asinh.cl b/libclc/amdgcn-amdhsa/libspirv/math/asinh.cl new file mode 100644 index 0000000000000..8efcbc04bac71 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/asinh.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_asinh_f64(double); +float __ocml_asinh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_asinh +#define __CLC_BUILTIN __ocml_asinh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/atan2.cl b/libclc/amdgcn-amdhsa/libspirv/math/atan2.cl new file mode 100644 index 0000000000000..2a64cf8a80680 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/atan2.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_atan2_f64(double,double); +float __ocml_atan2_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_atan2 +#define __CLC_BUILTIN __ocml_atan2 +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/atanh.cl b/libclc/amdgcn-amdhsa/libspirv/math/atanh.cl new file mode 100644 index 0000000000000..91f34d2056361 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/atanh.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_atanh_f64(double); +float __ocml_atanh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_atanh +#define __CLC_BUILTIN __ocml_atanh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/ceil.cl b/libclc/amdgcn-amdhsa/libspirv/math/ceil.cl new file mode 100644 index 0000000000000..b9e36c26f8986 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/ceil.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_ceil_f64(double); +float __ocml_ceil_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_ceil +#define __CLC_BUILTIN __ocml_ceil +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/copysign.cl b/libclc/amdgcn-amdhsa/libspirv/math/copysign.cl new file mode 100644 index 0000000000000..473e86f9943a6 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/copysign.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_copysign_f64(double, double); +float __ocml_copysign_f32(float, float); + +#define __CLC_FUNCTION __spirv_ocl_copysign +#define __CLC_BUILTIN __ocml_copysign +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/cos.cl b/libclc/amdgcn-amdhsa/libspirv/math/cos.cl index 2a9fd49724ce0..b93ad3dcc38d6 100644 --- a/libclc/amdgcn-amdhsa/libspirv/math/cos.cl +++ b/libclc/amdgcn-amdhsa/libspirv/math/cos.cl @@ -6,9 +6,15 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t -__spirv_ocl_cos(__clc_fp32_t In) { - return __builtin_amdgcn_cosf(In); -} +double __ocml_cos_f64(double); + +float __ocml_cos_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_cos +#define __CLC_BUILTIN __ocml_cos +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/cosh.cl b/libclc/amdgcn-amdhsa/libspirv/math/cosh.cl new file mode 100644 index 0000000000000..1ad0f0427c269 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/cosh.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_cosh_f64(double); +float __ocml_cosh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_cosh +#define __CLC_BUILTIN __ocml_cosh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/cospi.cl b/libclc/amdgcn-amdhsa/libspirv/math/cospi.cl new file mode 100644 index 0000000000000..e20fe100152d5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/cospi.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_cospi_f64(double); +float __ocml_cospi_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_cospi +#define __CLC_BUILTIN __ocml_cospi +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/erf.cl b/libclc/amdgcn-amdhsa/libspirv/math/erf.cl new file mode 100644 index 0000000000000..a0b5686ed134c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/erf.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_erf_f64(double); +float __ocml_erf_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_erf +#define __CLC_BUILTIN __ocml_erf +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/erfc.cl b/libclc/amdgcn-amdhsa/libspirv/math/erfc.cl new file mode 100644 index 0000000000000..e3a97a8ad6efe --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/erfc.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_erfc_f64(double); +float __ocml_erfc_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_erfc +#define __CLC_BUILTIN __ocml_erfc +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/exp.cl b/libclc/amdgcn-amdhsa/libspirv/math/exp.cl new file mode 100644 index 0000000000000..9c324175cfa3c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/exp.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_exp_f64(double); +float __ocml_exp_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_exp +#define __CLC_BUILTIN __ocml_exp +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/exp10.cl b/libclc/amdgcn-amdhsa/libspirv/math/exp10.cl new file mode 100644 index 0000000000000..22c22d6b2c1d9 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/exp10.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_exp10_f64(double); +float __ocml_exp10_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_exp10 +#define __CLC_BUILTIN __ocml_exp10 +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/exp2.cl b/libclc/amdgcn-amdhsa/libspirv/math/exp2.cl new file mode 100644 index 0000000000000..2f173e32c6308 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/exp2.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_exp2_f64(double); +float __ocml_exp2_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_exp2 +#define __CLC_BUILTIN __ocml_exp2 +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/expm1.cl b/libclc/amdgcn-amdhsa/libspirv/math/expm1.cl new file mode 100644 index 0000000000000..5d6d5b240dff8 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/expm1.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_expm1_f64(double); +float __ocml_expm1_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_expm1 +#define __CLC_BUILTIN __ocml_expm1 +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/fabs.cl b/libclc/amdgcn-amdhsa/libspirv/math/fabs.cl new file mode 100644 index 0000000000000..193c4cadea4d9 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/fabs.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_fabs_f64(double); +float __ocml_fabs_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_fabs +#define __CLC_BUILTIN __ocml_fabs +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/fdim.cl b/libclc/amdgcn-amdhsa/libspirv/math/fdim.cl new file mode 100644 index 0000000000000..11fbec26b65bb --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/fdim.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_fdim_f64(double,double); +float __ocml_fdim_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_fdim +#define __CLC_BUILTIN __ocml_fdim +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/floor.cl b/libclc/amdgcn-amdhsa/libspirv/math/floor.cl new file mode 100644 index 0000000000000..4d21cc7435898 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/floor.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_floor_f64(double); +float __ocml_floor_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_floor +#define __CLC_BUILTIN __ocml_floor +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/fmax.cl b/libclc/amdgcn-amdhsa/libspirv/math/fmax.cl new file mode 100644 index 0000000000000..08e3878885be3 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/fmax.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_fmax_f64(double,double); +float __ocml_fmax_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_fmax +#define __CLC_BUILTIN __ocml_fmax +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/fmin.cl b/libclc/amdgcn-amdhsa/libspirv/math/fmin.cl new file mode 100644 index 0000000000000..6f60e65f190d0 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/fmin.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_fmin_f64(double,double); +float __ocml_fmin_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_fmin +#define __CLC_BUILTIN __ocml_fmin +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/fmod.cl b/libclc/amdgcn-amdhsa/libspirv/math/fmod.cl new file mode 100644 index 0000000000000..60ea7dac0278e --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/fmod.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_fmod_f64(double,double); +float __ocml_fmod_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_fmod +#define __CLC_BUILTIN __ocml_fmod +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/frexp.cl b/libclc/amdgcn-amdhsa/libspirv/math/frexp.cl new file mode 100644 index 0000000000000..eb0256822acf1 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/frexp.cl @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_frexp_f64(double, int *); +float __ocml_frexp_f32(float, int *); + +_CLC_OVERLOAD _CLC_DEF float __clc_spirv_ocl_frexp(float x, private int *ep) { + return __ocml_frexp_f32(x, ep); +} + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double __clc_spirv_ocl_frexp(double x, private int *ep) { + return __ocml_frexp_f64(x, ep); +} + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_spirv_ocl_frexp(half x, private int *ep) { + float t = x; + return __ocml_frexp_f32(t, ep); +} + +#endif + +#define __CLC_ADDRESS_SPACE private +#define __CLC_GENTYPE float +#include +#undef __CLC_GENTYPE +#ifdef cl_khr_fp64 +#define __CLC_GENTYPE double +#include +#undef __CLC_GENTYPE +#endif +#ifdef cl_khr_fp16 +#define __CLC_GENTYPE half +#include +#undef __CLC_GENTYPE +#endif +#undef __CLC_ADDRESS_SPACE + +#define __CLC_ADDRESS_SPACE global +#define __CLC_GENTYPE float +#include +#undef __CLC_GENTYPE +#ifdef cl_khr_fp64 +#define __CLC_GENTYPE double +#include +#undef __CLC_GENTYPE +#endif +#ifdef cl_khr_fp16 +#define __CLC_GENTYPE half +#include +#undef __CLC_GENTYPE +#endif +#undef __CLC_ADDRESS_SPACE + +#define __CLC_ADDRESS_SPACE local +#define __CLC_GENTYPE float +#include +#undef __CLC_GENTYPE +#ifdef cl_khr_fp64 +#define __CLC_GENTYPE double +#include +#undef __CLC_GENTYPE +#endif +#ifdef cl_khr_fp16 +#define __CLC_GENTYPE half +#include +#undef __CLC_GENTYPE +#endif +#undef __CLC_ADDRESS_SPACE diff --git a/libclc/amdgcn-amdhsa/libspirv/math/frexp.inc b/libclc/amdgcn-amdhsa/libspirv/math/frexp.inc new file mode 100644 index 0000000000000..7c8409040772a --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/frexp.inc @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__spirv_ocl_frexp(__CLC_GENTYPE x, __CLC_ADDRESS_SPACE int *ep) { + int t_ep; + __CLC_GENTYPE res = __clc_spirv_ocl_frexp(x, &t_ep); + *ep = t_ep; + return res; +} + +#define __CLC_GENTYPE_VEC __CLC_XCONCAT(__CLC_GENTYPE, 2) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE_VEC +__spirv_ocl_frexp(__CLC_GENTYPE_VEC x, __CLC_ADDRESS_SPACE int2 *ep) { + int ep_x; + int ep_y; + __CLC_GENTYPE_VEC res = (__CLC_GENTYPE_VEC)(__spirv_ocl_frexp(x.x, &ep_x), + __spirv_ocl_frexp(x.y, &ep_y)); + *ep = (int2)(ep_x, ep_y); + return res; +} + +#undef __CLC_GENTYPE_VEC +#define __CLC_GENTYPE_VEC __CLC_XCONCAT(__CLC_GENTYPE, 3) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE_VEC +__spirv_ocl_frexp(__CLC_GENTYPE_VEC x, __CLC_ADDRESS_SPACE int3 *ep) { + int ep_x; + int ep_y; + int ep_z; + __CLC_GENTYPE_VEC res = (__CLC_GENTYPE_VEC)(__spirv_ocl_frexp(x.x, &ep_x), + __spirv_ocl_frexp(x.y, &ep_y), + __spirv_ocl_frexp(x.z, &ep_z)); + *ep = (int3)(ep_x, ep_y, ep_z); + return res; +} + +#define __CLC_VECTORIZE_FREXP_IMPL(GENTYPE, VEC_LEN, HALF_VEC_LEN) \ + _CLC_OVERLOAD _CLC_DEF GENTYPE __spirv_ocl_frexp( \ + GENTYPE x, __CLC_ADDRESS_SPACE int##VEC_LEN *ep) { \ + int##HALF_VEC_LEN ep_lo; \ + int##HALF_VEC_LEN ep_hi; \ + GENTYPE res = (GENTYPE)(__spirv_ocl_frexp(x.lo, &ep_lo), \ + __spirv_ocl_frexp(x.hi, &ep_hi)); \ + *ep = (int##VEC_LEN)(ep_lo, ep_hi); \ + return res; \ + } + +#define __CLC_VECTORIZE_FREXP(VEC_LEN, HALF_VEC_LEN) \ + __CLC_VECTORIZE_FREXP_IMPL(__CLC_XCONCAT(__CLC_GENTYPE, VEC_LEN), VEC_LEN, \ + HALF_VEC_LEN) + +__CLC_VECTORIZE_FREXP(4, 2) +__CLC_VECTORIZE_FREXP(8, 4) +__CLC_VECTORIZE_FREXP(16, 8) + +#undef __CLC_GENTYPE_VEC +#undef __CLC_VECTORIZE_FREXP diff --git a/libclc/amdgcn-amdhsa/libspirv/math/hypot.cl b/libclc/amdgcn-amdhsa/libspirv/math/hypot.cl new file mode 100644 index 0000000000000..53701fd7ca264 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/hypot.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_hypot_f64(double,double); +float __ocml_hypot_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_hypot +#define __CLC_BUILTIN __ocml_hypot +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl b/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl new file mode 100644 index 0000000000000..2365ac6cf600b --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +double __ocml_ldexp_f64(double, int); +float __ocml_ldexp_f32(float, int); + +#define __CLC_FUNCTION __spirv_ocl_ldexp +#define __CLC_BUILTIN __ocml_ldexp +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) + +_CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_ldexp, __ocml_ldexp_f32, float, int) +_CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_ldexp, __ocml_ldexp_f32, float, uint) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_DEFINE_BINARY_BUILTIN(double, __spirv_ocl_ldexp, __ocml_ldexp_f64, double, int) +_CLC_DEFINE_BINARY_BUILTIN(double, __spirv_ocl_ldexp, __ocml_ldexp_f64, double, uint) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_ldexp, __ocml_ldexp_f32, half, int) +_CLC_DEFINE_BINARY_BUILTIN(half, __spirv_ocl_ldexp, __ocml_ldexp_f32, half, uint) +#endif diff --git a/libclc/amdgcn-amdhsa/libspirv/math/lgamma.cl b/libclc/amdgcn-amdhsa/libspirv/math/lgamma.cl new file mode 100644 index 0000000000000..003d7df0a11f2 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/lgamma.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_lgamma_f64(double); +float __ocml_lgamma_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_lgamma +#define __CLC_BUILTIN __ocml_lgamma +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/log.cl b/libclc/amdgcn-amdhsa/libspirv/math/log.cl new file mode 100644 index 0000000000000..b2a7709bf8c35 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/log.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_log_f64(double); +float __ocml_log_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_log +#define __CLC_BUILTIN __ocml_log +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/log10.cl b/libclc/amdgcn-amdhsa/libspirv/math/log10.cl new file mode 100644 index 0000000000000..f3fd6467062c5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/log10.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_log10_f64(double); +float __ocml_log10_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_log10 +#define __CLC_BUILTIN __ocml_log10 +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/log1p.cl b/libclc/amdgcn-amdhsa/libspirv/math/log1p.cl new file mode 100644 index 0000000000000..b42d1d93bca8b --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/log1p.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_log1p_f64(double); +float __ocml_log1p_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_log1p +#define __CLC_BUILTIN __ocml_log1p +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/logb.cl b/libclc/amdgcn-amdhsa/libspirv/math/logb.cl new file mode 100644 index 0000000000000..32653df78c8a5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/logb.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_logb_f64(double); +float __ocml_logb_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_logb +#define __CLC_BUILTIN __ocml_logb +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/modf.cl b/libclc/amdgcn-amdhsa/libspirv/math/modf.cl new file mode 100644 index 0000000000000..daaf816418179 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/modf.cl @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_modf_f64(double, double *); +float __ocml_modf_f32(float, float *); + +#define __CLC_MODF_IMPL(ADDRSPACE, BUILTIN, FP_TYPE, ARG_TYPE) \ + _CLC_OVERLOAD _CLC_DEF ARG_TYPE __spirv_ocl_modf(ARG_TYPE x, \ + ADDRSPACE ARG_TYPE *iptr) { \ + FP_TYPE stack_iptr; \ + ARG_TYPE ret = BUILTIN(x, &stack_iptr); \ + *iptr = stack_iptr; \ + return ret; \ + } + +#define __CLC_MODF(BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_MODF_IMPL(private, BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_MODF_IMPL(local, BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_MODF_IMPL(global, BUILTIN, FP_TYPE, ARG_TYPE) + +__CLC_MODF(__ocml_modf_f32, float, float) + + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_modf, float, + private, float) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_modf, float, + local, float) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_modf, float, + global, float) + +#ifdef cl_khr_fp64 +__CLC_MODF(__ocml_modf_f64, double, double) + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_modf, double, + private, double) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_modf, double, + local, double) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_modf, double, + global, double) +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +__CLC_MODF(__ocml_modf_f32, float, half) + +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_modf, half, + private, half) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_modf, half, + local, half) +_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_modf, half, + global, half) +#endif diff --git a/libclc/amdgcn-amdhsa/libspirv/math/nextafter.cl b/libclc/amdgcn-amdhsa/libspirv/math/nextafter.cl new file mode 100644 index 0000000000000..50a659d0d3035 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/nextafter.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_nextafter_f64(double,double); +float __ocml_nextafter_f32(float,float); + +#define __CLC_FUNCTION __spirv_ocl_nextafter +#define __CLC_BUILTIN __ocml_nextafter +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/pow.cl b/libclc/amdgcn-amdhsa/libspirv/math/pow.cl new file mode 100644 index 0000000000000..e5db8a67cc416 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/pow.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_pow_f64(double, double); +float __ocml_pow_f32(float, float); + +#define __CLC_FUNCTION __spirv_ocl_pow +#define __CLC_BUILTIN __ocml_pow +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/round.cl b/libclc/amdgcn-amdhsa/libspirv/math/round.cl new file mode 100644 index 0000000000000..77ef5c8c170a5 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/round.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_round_f64(double); +float __ocml_round_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_round +#define __CLC_BUILTIN __ocml_round +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/rsqrt.cl b/libclc/amdgcn-amdhsa/libspirv/math/rsqrt.cl new file mode 100644 index 0000000000000..3d8189e98231f --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/rsqrt.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_rsqrt_f64(double); +float __ocml_rsqrt_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_rsqrt +#define __CLC_BUILTIN __ocml_rsqrt +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/sin.cl b/libclc/amdgcn-amdhsa/libspirv/math/sin.cl index f976818f14caf..79e1dc75ab2e0 100644 --- a/libclc/amdgcn-amdhsa/libspirv/math/sin.cl +++ b/libclc/amdgcn-amdhsa/libspirv/math/sin.cl @@ -6,9 +6,14 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t -__spirv_ocl_sin(__clc_fp32_t In) { - return __builtin_amdgcn_sinf(In); -} +double __ocml_sin_f64(double); +float __ocml_sin_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_sin +#define __CLC_BUILTIN __ocml_sin +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/sincos.cl b/libclc/amdgcn-amdhsa/libspirv/math/sincos.cl new file mode 100644 index 0000000000000..415f18eec4975 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/sincos.cl @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + #include + #include + + void __ocml_sincos_f64(double, double *, double *); + void __ocml_sincos_f32(float, float *, float * ); + + #define __CLC_SINCOS_IMPL(ADDRSPACE, BUILTIN, FP_TYPE, ARG_TYPE) \ + _CLC_OVERLOAD _CLC_DEF ARG_TYPE __spirv_ocl_sincos( \ + ARG_TYPE x, ADDRSPACE ARG_TYPE *cosval_ptr) { \ + FP_TYPE sinval; \ + FP_TYPE cosval; \ + BUILTIN(x, &sinval, &cosval); \ + *cosval_ptr = cosval; \ + return sinval; \ + } + + #define __CLC_SINCOS(BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_SINCOS_IMPL(global, BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_SINCOS_IMPL(local, BUILTIN, FP_TYPE, ARG_TYPE) \ + __CLC_SINCOS_IMPL(private, BUILTIN, FP_TYPE, ARG_TYPE) + + __CLC_SINCOS(__ocml_sincos_f32, float, float) + + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_sincos, float, + private, float) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_sincos, float, + local, float) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_sincos, float, + global, float) + + #ifdef cl_khr_fp64 + __CLC_SINCOS(__ocml_sincos_f64, double, double) + + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sincos, + double, private, double) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sincos, + double, local, double) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __spirv_ocl_sincos, + double, global, double) + #endif + + #ifdef cl_khr_fp16 + #pragma OPENCL EXTENSION cl_khr_fp16 : enable + __CLC_SINCOS(__ocml_sincos_f32, float, half) + + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_sincos, half, + private, half) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_sincos, half, + local, half) + _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_sincos, half, + global, half) + #endif diff --git a/libclc/amdgcn-amdhsa/libspirv/math/sinpi.cl b/libclc/amdgcn-amdhsa/libspirv/math/sinpi.cl new file mode 100644 index 0000000000000..918b43acb2c0f --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/sinpi.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_sinpi_f64(double); +float __ocml_sinpi_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_sinpi +#define __CLC_BUILTIN __ocml_sinpi +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/sqrt.cl b/libclc/amdgcn-amdhsa/libspirv/math/sqrt.cl index d499e1de2a90a..8a91b898e360a 100644 --- a/libclc/amdgcn-amdhsa/libspirv/math/sqrt.cl +++ b/libclc/amdgcn-amdhsa/libspirv/math/sqrt.cl @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t -__spirv_ocl_sqrt(__clc_fp32_t In) { - return __builtin_amdgcn_sqrtf(In); -} +double __ocml_sqrt_f64(double); +float __ocml_sqrt_f32(float); -_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp64_t -__spirv_ocl_sqrt(__clc_fp64_t In) { - return __builtin_amdgcn_sqrt(In); -} +#define __CLC_FUNCTION __spirv_ocl_sqrt +#define __CLC_BUILTIN __ocml_sqrt +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/tan.cl b/libclc/amdgcn-amdhsa/libspirv/math/tan.cl new file mode 100644 index 0000000000000..7324b43dff33b --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/tan.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_tan_f64(double); +float __ocml_tan_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_tan +#define __CLC_BUILTIN __ocml_tan +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/tanh.cl b/libclc/amdgcn-amdhsa/libspirv/math/tanh.cl new file mode 100644 index 0000000000000..f9a9d87623cb3 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/tanh.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_tanh_f64(double); +float __ocml_tanh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_tanh +#define __CLC_BUILTIN __ocml_tanh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include diff --git a/libclc/amdgcn-amdhsa/libspirv/math/trunc.cl b/libclc/amdgcn-amdhsa/libspirv/math/trunc.cl new file mode 100644 index 0000000000000..06740ffaef28c --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/trunc.cl @@ -0,0 +1,19 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_trunc_f64(double); +float __ocml_trunc_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_trunc +#define __CLC_BUILTIN __ocml_trunc +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include From f0747747ba26286472e8b7d513c2e2541ab19f66 Mon Sep 17 00:00:00 2001 From: GYDmedwin Date: Sat, 20 Nov 2021 02:40:12 +0800 Subject: [PATCH 02/14] [libclc] Delete the wrong file name in the SOURCE file, and add a new implementation file. (#4997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unimplemented file name in the SOURCE file: ‘ilogb’ and 'fma'. At the same time, I added the sinh.cl file to ensure correct compilation.In addition, I deleted the redundant code in ‘ldexp’. --- libclc/amdgcn-amdhsa/libspirv/SOURCES | 2 -- libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl | 5 ----- libclc/amdgcn-amdhsa/libspirv/math/sinh.cl | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 libclc/amdgcn-amdhsa/libspirv/math/sinh.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/SOURCES b/libclc/amdgcn-amdhsa/libspirv/SOURCES index 2a0650facae28..3e3a14f221a01 100644 --- a/libclc/amdgcn-amdhsa/libspirv/SOURCES +++ b/libclc/amdgcn-amdhsa/libspirv/SOURCES @@ -23,13 +23,11 @@ math/expm1.cl math/fabs.cl math/fdim.cl math/floor.cl -math/fma.cl math/fmax.cl math/fmin.cl math/fmod.cl math/frexp.cl math/hypot.cl -math/ilogb.cl math/ldexp.cl math/lgamma.cl math/log.cl diff --git a/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl b/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl index 2365ac6cf600b..de5942b17666d 100644 --- a/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl +++ b/libclc/amdgcn-amdhsa/libspirv/math/ldexp.cl @@ -14,11 +14,6 @@ double __ocml_ldexp_f64(double, int); float __ocml_ldexp_f32(float, int); -#define __CLC_FUNCTION __spirv_ocl_ldexp -#define __CLC_BUILTIN __ocml_ldexp -#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) -#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) - _CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_ldexp, __ocml_ldexp_f32, float, int) _CLC_DEFINE_BINARY_BUILTIN(float, __spirv_ocl_ldexp, __ocml_ldexp_f32, float, uint) diff --git a/libclc/amdgcn-amdhsa/libspirv/math/sinh.cl b/libclc/amdgcn-amdhsa/libspirv/math/sinh.cl new file mode 100644 index 0000000000000..590afe60a8dc7 --- /dev/null +++ b/libclc/amdgcn-amdhsa/libspirv/math/sinh.cl @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +double __ocml_sinh_f64(double); +float __ocml_sinh_f32(float); + +#define __CLC_FUNCTION __spirv_ocl_sinh +#define __CLC_BUILTIN __ocml_sinh +#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, _f32) +#define __CLC_BUILTIN_D __CLC_XCONCAT(__CLC_BUILTIN, _f64) +#include + From 2af6ccdb9ad32bc0a608e78ad46301f2f53e88e6 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Sat, 20 Nov 2021 08:34:22 +0100 Subject: [PATCH 03/14] [SYCL] Fix memory leak in online compiler (#4963) The experimental online compiler may leak memory in compileToSPIRV. These changes address this leak by storing the SPIR-V binary information directly in the vector that will later be returned. Signed-off-by: Steffen Larsen --- .../detail/online_compiler/online_compiler.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/online_compiler/online_compiler.cpp b/sycl/source/detail/online_compiler/online_compiler.cpp index 96a1641750bc3..78f2c12098d19 100644 --- a/sycl/source/detail/online_compiler/online_compiler.cpp +++ b/sycl/source/detail/online_compiler/online_compiler.cpp @@ -160,16 +160,14 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType, &SourceName, 0, nullptr, nullptr, nullptr, &NumOutputs, &Outputs, &OutputLengths, &OutputNames); - byte *SpirV = nullptr; + std::vector SpirV; std::string CompileLog; - size_t SpirVSize = 0; for (uint32_t I = 0; I < NumOutputs; I++) { size_t NameLen = strlen(OutputNames[I]); if (NameLen >= 4 && strstr(OutputNames[I], ".spv") != nullptr && Outputs[I] != nullptr) { - SpirVSize = OutputLengths[I]; - SpirV = new byte[SpirVSize]; - std::memcpy(SpirV, Outputs[I], SpirVSize); + assert(SpirV.size() == 0 && "More than one SPIR-V output found."); + SpirV = std::vector(Outputs[I], Outputs[I] + OutputLengths[I]); } else if (!strcmp(OutputNames[I], "stdout.log")) { CompileLog = std::string(reinterpret_cast(Outputs[I])); } @@ -184,13 +182,13 @@ compileToSPIRV(const std::string &Source, sycl::info::device_type DeviceType, if (CompileError) throw online_compile_error("ocloc reported compilation errors: {\n" + CompileLog + "\n}"); - if (!SpirV) + if (SpirV.empty()) throw online_compile_error( "Unexpected output: ocloc did not return SPIR-V"); if (MemFreeError) throw online_compile_error("ocloc cannot safely free resources"); - return std::vector(SpirV, SpirV + SpirVSize); + return SpirV; } } // namespace detail From 653bae908a103712372ff09e02334afd659273d9 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Mon, 22 Nov 2021 12:14:02 +0300 Subject: [PATCH 04/14] [SYCL] Diagnose non-forward declarable kernel name types (#4945) Add additional error emission in host mode because only on host with help of integration header possible to differentiate case like this: ``` int main() { parallel_for(..); } ``` which uses technically non-forward declarable kernel name type but still allowed by SYCL spec, from case like this: ``` int main() { class KernelName; parallel_for(..); } ``` which should be diagnosed, since the errors are emitted in runtime. Everything works when `KernelName` typename is used directly in `parallel_for`, because in this case when the type `KernelName` is forward declared by integration header, host uses `::KernelName` typename to access kernel info. However when `KernelName` is forward declared in non-global/namespace scope it actually produces a separate delclaration `main::KernelName` which is not visible for runtime code that submits kernels. --- clang/lib/Sema/SemaSYCL.cpp | 38 ++++++++++---- clang/test/CodeGenSYCL/loop_fusion_host.cpp | 8 +++ clang/test/CodeGenSYCL/stall_enable_host.cpp | 8 +++ .../CL/sycl/detail/defines_elementary.hpp | 8 +++ .../Inputs/CL/sycl/detail/kernel_desc.hpp | 51 +++++++++++++++++++ clang/test/SemaSYCL/Inputs/sycl.hpp | 37 +++++++------- .../non-fwd-declarable-kernel-name.cpp | 48 +++++++++++++++++ sycl/test/warnings/sycl_2020_deprecations.cpp | 8 +++ 8 files changed, 179 insertions(+), 27 deletions(-) create mode 100644 clang/test/SemaSYCL/Inputs/CL/sycl/detail/defines_elementary.hpp create mode 100644 clang/test/SemaSYCL/Inputs/CL/sycl/detail/kernel_desc.hpp create mode 100644 clang/test/SemaSYCL/non-fwd-declarable-kernel-name.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 0aa03d17a80ae..7445ccde6a2e1 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -3509,9 +3509,26 @@ class SYCLKernelNameTypeVisitor IsInvalid = true; return; } - // Check if the declaration is completely defined within a - // function or class/struct. - if (Tag->isCompleteDefinition()) { + + // Diagnose used types without complete definition i.e. + // int main() { + // class KernelName1; + // parallel_for(..); + // } + // This case can only be diagnosed during host compilation because the + // integration header is required to distinguish between the invalid + // code (above) and the following valid code: + // int main() { + // parallel_for(..); + // } + // The device compiler forward declares both KernelName1 and + // KernelName2 in the integration header as ::KernelName1 and + // ::KernelName2. The problem with the former case is the additional + // declaration 'class KernelName1' in non-global scope. Lookup in this + // case will resolve to ::main::KernelName1 (instead of + // ::KernelName1). Since this is not visible to runtime code that + // submits kernels, this is invalid. + if (Tag->isCompleteDefinition() || S.getLangOpts().SYCLIsHost) { S.Diag(KernelInvocationFuncLoc, diag::err_sycl_kernel_incorrectly_named) << /* kernel name should be forward declarable at namespace @@ -3561,14 +3578,20 @@ class SYCLKernelNameTypeVisitor void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc, ArrayRef Args) { + QualType KernelNameType = + calculateKernelNameType(getASTContext(), KernelFunc); + SYCLKernelNameTypeVisitor KernelNameTypeVisitor( + *this, Args[0]->getExprLoc(), KernelNameType, + IsSYCLUnnamedKernel(*this, KernelFunc)); + KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); + // FIXME: In place until the library works around its 'host' invocation // issues. if (!LangOpts.SYCLIsDevice) return; + const CXXRecordDecl *KernelObj = GetSYCLKernelObjectType(KernelFunc)->getAsCXXRecordDecl(); - QualType KernelNameType = - calculateKernelNameType(getASTContext(), KernelFunc); if (!KernelObj) { Diag(Args[0]->getExprLoc(), diag::err_sycl_kernel_not_function_object); @@ -3609,15 +3632,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc, IsSIMDKernel); KernelObjVisitor Visitor{*this}; - SYCLKernelNameTypeVisitor KernelNameTypeVisitor( - *this, Args[0]->getExprLoc(), KernelNameType, - IsSYCLUnnamedKernel(*this, KernelFunc)); DiagnosingSYCLKernel = true; // Emit diagnostics for SYCL device kernels only - if (LangOpts.SYCLIsDevice) - KernelNameTypeVisitor.Visit(KernelNameType.getCanonicalType()); Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker, DecompMarker); Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker, DecompMarker); diff --git a/clang/test/CodeGenSYCL/loop_fusion_host.cpp b/clang/test/CodeGenSYCL/loop_fusion_host.cpp index 0cddfd6d6ed1d..3b9fd8be45535 100644 --- a/clang/test/CodeGenSYCL/loop_fusion_host.cpp +++ b/clang/test/CodeGenSYCL/loop_fusion_host.cpp @@ -5,6 +5,14 @@ __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { kernelFunc(); } +// This test uses SYCL host only mode without integration header, so +// forward declare used kernel name class, otherwise it will be diagnosed by +// the diagnostic implemented in https://github.com/intel/llvm/pull/4945. +// The error happens because in host mode it is assumed that all kernel names +// are forward declared at global or namespace scope because of integration +// header. +class kernel_name_1; + template class KernelFunctor5 { public: diff --git a/clang/test/CodeGenSYCL/stall_enable_host.cpp b/clang/test/CodeGenSYCL/stall_enable_host.cpp index 4f99f0dfa2bae..b85846232286f 100644 --- a/clang/test/CodeGenSYCL/stall_enable_host.cpp +++ b/clang/test/CodeGenSYCL/stall_enable_host.cpp @@ -2,6 +2,14 @@ // Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Host (no-op in IR-CodeGen for host-mode). +// This test uses SYCL host only mode without integration header, so +// forward declare used kernel name class, otherwise it will be diagnosed by +// the diagnostic implemented in https://github.com/intel/llvm/pull/4945. +// The error happens because in host mode it is assumed that all kernel names +// are forward declared at global or namespace scope because of integration +// header. +class kernel_name_1; + [[intel::use_stall_enable_clusters]] void test() {} void test1() { diff --git a/clang/test/SemaSYCL/Inputs/CL/sycl/detail/defines_elementary.hpp b/clang/test/SemaSYCL/Inputs/CL/sycl/detail/defines_elementary.hpp new file mode 100644 index 0000000000000..00322e60ffc51 --- /dev/null +++ b/clang/test/SemaSYCL/Inputs/CL/sycl/detail/defines_elementary.hpp @@ -0,0 +1,8 @@ +#pragma once + +#ifndef __SYCL_DISABLE_NAMESPACE_INLINE__ +#define __SYCL_INLINE_NAMESPACE(X) inline namespace X +#else +#define __SYCL_INLINE_NAMESPACE(X) namespace X +#endif // __SYCL_DISABLE_NAMESPACE_INLINE__ +#define __SYCL_DLL_LOCAL diff --git a/clang/test/SemaSYCL/Inputs/CL/sycl/detail/kernel_desc.hpp b/clang/test/SemaSYCL/Inputs/CL/sycl/detail/kernel_desc.hpp new file mode 100644 index 0000000000000..a33373f5ba987 --- /dev/null +++ b/clang/test/SemaSYCL/Inputs/CL/sycl/detail/kernel_desc.hpp @@ -0,0 +1,51 @@ +#pragma once + +#include + +__SYCL_INLINE_NAMESPACE(cl) { + namespace sycl { + namespace detail { + +#ifndef __SYCL_DEVICE_ONLY__ +#define _Bool bool +#endif + + // kernel parameter kinds + enum class kernel_param_kind_t { + kind_accessor = 0, + kind_std_layout = 1, // standard layout object parameters + kind_sampler = 2, + kind_pointer = 3, + kind_specialization_constants_buffer = 4, + kind_stream = 5, + kind_invalid = 0xf, // not a valid kernel kind + }; + + // describes a kernel parameter + struct kernel_param_desc_t { + // parameter kind + kernel_param_kind_t kind; + // kind == kind_std_layout + // parameter size in bytes (includes padding for structs) + // kind == kind_accessor + // access target; possible access targets are defined in access/access.hpp + int info; + // offset of the captured value of the parameter in the lambda or function + // object + int offset; + }; + + template struct KernelInfo { + static constexpr unsigned getNumParams() { return 0; } + static const kernel_param_desc_t &getParamDesc(int) { + static kernel_param_desc_t Dummy; + return Dummy; + } + static constexpr const char *getName() { return ""; } + static constexpr bool isESIMD() { return 0; } + static constexpr bool callsThisItem() { return false; } + static constexpr bool callsAnyThisFreeFunction() { return false; } + }; + } // namespace detail + } // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 9cb22bce7177b..fefb12939d01a 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -130,6 +130,8 @@ struct opencl_image_type; using type = __ocl_image##dim##d_##ifarray_##amsuffix##_t; \ }; +#ifdef __SYCL_DEVICE_ONLY__ + #define IMAGETY_READ_3_DIM_IMAGE \ IMAGETY_DEFINE(1, read, ro, image, ) \ IMAGETY_DEFINE(2, read, ro, image, ) \ @@ -154,6 +156,8 @@ IMAGETY_WRITE_3_DIM_IMAGE IMAGETY_READ_2_DIM_IARRAY IMAGETY_WRITE_2_DIM_IARRAY +#endif // __SYCL_DEVICE_ONLY__ + template struct _ImageImplT { #ifdef __SYCL_DEVICE_ONLY__ @@ -232,19 +236,35 @@ template struct get_kernel_wrapper_name_t { #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) template ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc) { // #KernelSingleTaskFunc +#ifdef __SYCL_DEVICE_ONLY__ kernelFunc(); // #KernelSingleTaskKernelFuncCall +#else + (void)kernelFunc; +#endif } template ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc, kernel_handler kh) { +#ifdef __SYCL_DEVICE_ONLY__ kernelFunc(kh); +#else + (void)kernelFunc; +#endif } template ATTR_SYCL_KERNEL void kernel_parallel_for(const KernelType &kernelFunc) { +#ifdef __SYCL_DEVICE_ONLY__ kernelFunc(); +#else + (void)kernelFunc; +#endif } template ATTR_SYCL_KERNEL void kernel_parallel_for_work_group(const KernelType &KernelFunc, kernel_handler kh) { +#ifdef __SYCL_DEVICE_ONLY__ KernelFunc(group<1>(), kh); +#else + (void)KernelFunc; +#endif } class handler { @@ -252,40 +272,23 @@ class handler { template void single_task(const KernelType &kernelFunc) { using NameT = typename get_kernel_name_t::name; -#ifdef __SYCL_DEVICE_ONLY__ kernel_single_task(kernelFunc); // #KernelSingleTask -#else - kernelFunc(); -#endif } template void single_task(const KernelType &kernelFunc, kernel_handler kh) { using NameT = typename get_kernel_name_t::name; -#ifdef __SYCL_DEVICE_ONLY__ kernel_single_task(kernelFunc, kh); -#else - kernelFunc(kh); -#endif } template void parallel_for(const KernelType &kernelObj) { using NameT = typename get_kernel_name_t::name; using NameWT = typename get_kernel_wrapper_name_t::name; -#ifdef __SYCL_DEVICE_ONLY__ kernel_parallel_for(kernelObj); -#else - kernelObj(); -#endif } template void parallel_for_work_group(const KernelType &kernelFunc, kernel_handler kh) { using NameT = typename get_kernel_name_t::name; -#ifdef __SYCL_DEVICE_ONLY__ kernel_parallel_for_work_group(kernelFunc, kh); -#else - group<1> G; - kernelFunc(G, kh); -#endif } }; diff --git a/clang/test/SemaSYCL/non-fwd-declarable-kernel-name.cpp b/clang/test/SemaSYCL/non-fwd-declarable-kernel-name.cpp new file mode 100644 index 0000000000000..39d6ed4452f22 --- /dev/null +++ b/clang/test/SemaSYCL/non-fwd-declarable-kernel-name.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fno-sycl-unnamed-lambda -fsyntax-only -sycl-std=2020 -fsycl-int-header=%t.h %s +// RUN: %clang_cc1 -fsycl-is-host -internal-isystem %S/Inputs -fno-sycl-unnamed-lambda -fsyntax-only -verify -include %t.h %s + +// This test verifies that incorrect kernel names are diagnosed correctly. + +#include "sycl.hpp" + +using namespace cl::sycl; + +// user-defined function +void function() { +} + +// user-defined struct +struct myWrapper { + class insideStruct; +}; + +template class RandomTemplate; + +int main() { + queue q; + + q.submit([&](handler &h) { + h.single_task([]() { function(); }); + }); + q.submit([&](handler &h) { + h.single_task>([]() { function(); }); + }); + + class NotOk; + // expected-error@#KernelSingleTask {{'NotOk' is invalid; kernel name should be forward declarable at namespace scope}} + // expected-note@+2 {{in instantiation of function template specialization}} + q.submit([&](handler &h) { + h.single_task([]() { function(); }); + }); + // expected-error@#KernelSingleTask {{'myWrapper::insideStruct' is invalid; kernel name should be forward declarable at namespace scope}} + // expected-note@+2 {{in instantiation of function template specialization}} + q.submit([&](handler &h) { + h.single_task([]() { function(); }); + }); + // expected-error@#KernelSingleTask {{'RandomTemplate' is invalid; kernel name should be forward declarable at namespace scope}} + // expected-note@+2 {{in instantiation of function template specialization}} + q.submit([&](handler &h) { + h.single_task>([]() { function(); }); + }); + return 0; +} diff --git a/sycl/test/warnings/sycl_2020_deprecations.cpp b/sycl/test/warnings/sycl_2020_deprecations.cpp index 59a2bf000ba6f..611749137cfaf 100644 --- a/sycl/test/warnings/sycl_2020_deprecations.cpp +++ b/sycl/test/warnings/sycl_2020_deprecations.cpp @@ -6,6 +6,14 @@ #include #include +// This test uses SYCL host only mode without integration header, so +// forward declare used kernel name class, otherwise it will be diagnosed by +// the diagnostic implemented in https://github.com/intel/llvm/pull/4945. +// The error happens because in host mode it is assumed that all kernel names +// are forward declared at global or namespace scope because of integration +// header. +class Test; + int main() { cl_context ClCtx; // expected-error@+1 {{no matching constructor for initialization of 'sycl::context'}} From 92bcb4194af0411dfecadc507fc61b9f3b451923 Mon Sep 17 00:00:00 2001 From: vladimirlaz Date: Mon, 22 Nov 2021 12:29:47 +0300 Subject: [PATCH 05/14] [SYCL] Use ICD registry keys instead of OCL_ICD_FILENAMES (#4984) Change OpenCL CPU and FPGA emulator runtimes configuration on Windows to use OpenCL ICD registry records instead of OCL_ICD_FILENAMES. That is done to use the latest OpenCL ICD loader which ignores OCL_ICD_FILENAMES configuration in the administrative console. Update documentation accordingly. --- sycl/doc/GetStartedGuide.md | 4 ++-- sycl/tools/install.bat | 41 ++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 40090c5d01c49..1daf6b09a1944 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -416,10 +416,10 @@ command: ```bash # Install OpenCL FPGA emulation RT - # Answer N to clean previous OCL_ICD_FILENAMES configuration + # Answer Y to clean previous OCL_ICD_FILENAMES configuration and ICD records cleanup c:\oclfpga_rt_\install.bat c:\oneapi-tbb-\redist\intel64\vc14 # Install OpenCL CPU RT - # Answer Y to setup CPU RT side-bi-side with FPGA RT + # Answer N for ICD records cleanup c:\oclcpu_rt_\install.bat c:\oneapi-tbb-\redist\intel64\vc14 ``` diff --git a/sycl/tools/install.bat b/sycl/tools/install.bat index a28a290cd48e8..1f37299b171a5 100755 --- a/sycl/tools/install.bat +++ b/sycl/tools/install.bat @@ -1,5 +1,5 @@ @echo off -setlocal EnableDelayedExpansion +setlocal EnableDelayedExpansion enableextensions set OCL_RT_DIR=%~dp0 echo ### @@ -12,28 +12,25 @@ IF NOT EXIST %OCL_RT_ENTRY_LIB% ( set OCL_RT_ENTRY_LIB=%OCL_RT_DIR%intelocl64_emu.dll ) -IF "%OCL_ICD_FILENAMES%" == "" ( - set EXTENDEXISTING=N -) else ( +IF NOT "%OCL_ICD_FILENAMES%" == "" ( echo OCL_ICD_FILENAMES is present and contains %OCL_ICD_FILENAMES% :USERINPUT - set /P "EXTENDEXISTING=Should the OpenCL RT extend existing configuration (Y/N): " -) -IF "%EXTENDEXISTING%" == "N" ( - echo Clean up previous configuration - set OCL_ICD_FILENAMES=%OCL_RT_ENTRY_LIB% -) else ( - IF "%EXTENDEXISTING%" == "Y" ( - - set OCL_ICD_FILENAMES=%OCL_ICD_FILENAMES%;%OCL_RT_ENTRY_LIB% - echo Extend previous configuration to %OCL_ICD_FILENAMES%;%OCL_RT_ENTRY_LIB% + set /P "CLEAREXISTING=Should the OCL_ICD_FILENAMES be removed (Y/N): " + IF "!CLEAREXISTING!" == "N" ( + echo Existing configuration is going to be preserved ) else ( - echo WARNING: Incorrect input %EXTENDEXISTING%. Only Y and N are allowed. - goto USERINPUT + IF "!CLEAREXISTING!" == "Y" ( + echo Clean up previous configuration + REG DELETE "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /f /v OCL_ICD_FILENAMES + echo Execute `set OCL_ICD_FILENAMES=` to remove variable from the current console + + ) else ( + echo WARNING: Incorrect input !CLEAREXISTING!. Only Y and N are allowed. + goto USERINPUT + ) ) ) - set SYSTEM_OCL_ICD_LOADER=C:\Windows\System32\OpenCL.dll set NEW_OCL_ICD_LOADER=%OCL_RT_DIR%\OpenCL.dll set INSTALL_ERRORS=0 @@ -103,11 +100,13 @@ IF %NEED_OPENCL_UPGRADE% == True ( echo. echo ### -echo ### 3. Set the environment variable OCL_ICD_FILENAMES to %OCL_ICD_FILENAMES% +echo ### 3. Configure ICD registry records echo ### -REG ADD "HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\Environment" /f /v OCL_ICD_FILENAMES /d "%OCL_ICD_FILENAMES%" +echo Deleting all obsolete registry keys +REG DELETE "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /va +REG ADD "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /f /v %OCL_RT_ENTRY_LIB% /t REG_DWORD /d "0" IF ERRORLEVEL 1 ( - echo !!! Cannot set the environment variable OCL_ICD_FILENAMES + echo !!! Cannot set ICD registry key set INSTALL_ERRORS=1 ) @@ -165,7 +164,7 @@ IF %INSTALL_ERRORS% == 1 ( echo See recommendations printed above and perform the following actions manually: echo 1. Save %SYSTEM_OCL_ICD_LOADER% to %SYSTEM_OCL_ICD_LOADER%.%SYSTEM_OPENCL_VER% echo 2. Copy %NEW_OCL_ICD_LOADER% to %SYSTEM_OCL_ICD_LOADER% - echo 3. Add/set the environment variable OCL_ICD_FILENAMES to %OCL_RT_ENTRY_LIB% + echo 3. Add/update registry string value in Computer\HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors\%OCL_RT_ENTRY_LIB% containing 0 echo 4. Copy TBB libraries or create symbolic links in %OCL_RT_DIR%tbb. echo 5. Add/set the environment variable PATH to %OCL_RT_DIR%tbb echo Or try running this batch file as Administrator. From b0f9a812c55cbd058253f8b6506d9116ef54c183 Mon Sep 17 00:00:00 2001 From: Pavel Samolysov Date: Mon, 22 Nov 2021 12:31:45 +0300 Subject: [PATCH 06/14] [SYCL] Add a nice error message to sycl::buffer to std::string (#4973) Since sycl::vec is not a trivially copyable class, even though we have defined a specialization for sycl::is_device_copyable template for the sycl::vec class itself, it doesn't make any classes that have members of the sycl::vec type device copyable since those classes aren't trivially copyable yet. For example, a class from SYCL-CTS: template struct image_accessor_failure_item { bool triggered; T value; T expected; image_accessor_failure_item() : triggered(false), value(0), expected(0) {} }; is not trivially copyable since it has two fields: value and expected of type sycl::vec; therefore, the class is not device copyable and a compilation error occurs when a sycl::buffer is created for this class instances. The regression makes us to revert the commit and delay adding the general check until the sycl::vec class is modified to be trivially copyable. Instead a particular check for the std::string class only was added. --- sycl/include/CL/sycl/buffer.hpp | 8 ++++---- sycl/include/CL/sycl/types.hpp | 7 ------- .../basic_tests/buffer/buffer_for_not_device_copyable.cpp | 5 +---- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index 73174e690f481..a293e796be53d 100755 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -14,7 +14,6 @@ #include #include #include -#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -45,9 +44,10 @@ template 0) && (dimensions <= 3)>> class buffer { - static_assert( - is_device_copyable::value, - "The underlying data type of a buffer 'T' must be device copyable"); + // TODO check is_device_copyable::value after converting sycl::vec into a + // trivially copyable class. + static_assert(!std::is_same::value, + "'std::string' is not a device copyable type"); public: using value_type = T; diff --git a/sycl/include/CL/sycl/types.hpp b/sycl/include/CL/sycl/types.hpp index 5bb77ec0b4372..d6dacff4dfa23 100644 --- a/sycl/include/CL/sycl/types.hpp +++ b/sycl/include/CL/sycl/types.hpp @@ -2423,13 +2423,6 @@ struct is_device_copyable< !std::is_trivially_copyable::value>> : std::true_type {}; -// vec is device copyable on host, on device vec is trivially copyable -// and therefore is device copyable too. -#ifndef __SYCL_DEVICE_ONLY__ -template -struct is_device_copyable> : std::true_type {}; -#endif - namespace detail { template struct IsDeprecatedDeviceCopyable : std::false_type {}; diff --git a/sycl/test/basic_tests/buffer/buffer_for_not_device_copyable.cpp b/sycl/test/basic_tests/buffer/buffer_for_not_device_copyable.cpp index 6490db0e8a73d..1a0969b4fb495 100644 --- a/sycl/test/basic_tests/buffer/buffer_for_not_device_copyable.cpp +++ b/sycl/test/basic_tests/buffer/buffer_for_not_device_copyable.cpp @@ -13,10 +13,7 @@ int main() { static_assert(!is_device_copyable_v); std::vector sv{"hello", "sycl", "world"}; buffer b2(sv.data(), range<1>(3)); - //expected-error@CL/sycl/buffer.hpp:* {{"The underlying data type of a buffer 'T' must be device copyable"}} + //expected-error@CL/sycl/buffer.hpp:* {{"'std::string' is not a device copyable type"}} - static_assert(is_device_copyable>::value); - vec iVec; - buffer b3(&iVec, range<1>(1)); return 0; } From c4fa95f6fa09cf36af834b48cae76be040805c08 Mon Sep 17 00:00:00 2001 From: Dmitri Mokhov Date: Mon, 22 Nov 2021 04:23:41 -0600 Subject: [PATCH 07/14] [SYCL] Add SYCL 2020 info::device::built_in_kernel_ids support (#4996) - Add `ProgramManager::getBuiltInKernelID`, which generates and caches built-in kernel IDs. - Use that API to generate or look up built-in kernel IDs, when queried. - Throw an exception in program manager when actually trying to use built-in kernels, since they are not yet fully supported. - Add SYCL 2020 deprecation warning for `built_in_kernels` (old query). --- sycl/include/CL/sycl/info/device_traits.def | 1 + sycl/include/CL/sycl/info/info_desc.hpp | 5 +++- sycl/source/detail/device_impl.hpp | 1 + sycl/source/detail/device_info.hpp | 26 +++++++++++++++++++ .../program_manager/program_manager.cpp | 24 +++++++++++++++++ .../program_manager/program_manager.hpp | 11 ++++++++ sycl/test/abi/sycl_symbols_linux.dump | 21 ++++++++------- sycl/test/abi/sycl_symbols_windows.dump | 25 +++++++++--------- sycl/test/warnings/sycl_2020_deprecations.cpp | 4 +++ 9 files changed, 95 insertions(+), 23 deletions(-) diff --git a/sycl/include/CL/sycl/info/device_traits.def b/sycl/include/CL/sycl/info/device_traits.def index 2b0f637b09032..7393a898636a9 100644 --- a/sycl/include/CL/sycl/info/device_traits.def +++ b/sycl/include/CL/sycl/info/device_traits.def @@ -58,6 +58,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, is_linker_available, bool) __SYCL_PARAM_TRAITS_SPEC(device, execution_capabilities, std::vector) __SYCL_PARAM_TRAITS_SPEC(device, queue_profiling, bool) +__SYCL_PARAM_TRAITS_SPEC(device, built_in_kernel_ids, std::vector) __SYCL_PARAM_TRAITS_SPEC(device, built_in_kernels, std::vector) __SYCL_PARAM_TRAITS_SPEC(device, platform, cl::sycl::platform) __SYCL_PARAM_TRAITS_SPEC(device, name, std::string) diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index 3d03c37ff0f71..f47ff2b53399a 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -20,6 +20,7 @@ class program; #endif class device; class platform; +class kernel_id; // TODO: stop using OpenCL directly, use PI. namespace info { @@ -109,7 +110,8 @@ enum class device : cl_device_info { is_linker_available = CL_DEVICE_LINKER_AVAILABLE, execution_capabilities = CL_DEVICE_EXECUTION_CAPABILITIES, queue_profiling = CL_DEVICE_QUEUE_PROPERTIES, - built_in_kernels = CL_DEVICE_BUILT_IN_KERNELS, + built_in_kernels __SYCL2020_DEPRECATED("use built_in_kernel_ids instead") = + CL_DEVICE_BUILT_IN_KERNELS, platform = CL_DEVICE_PLATFORM, name = CL_DEVICE_NAME, vendor = CL_DEVICE_VENDOR, @@ -136,6 +138,7 @@ enum class device : cl_device_info { sub_group_sizes = CL_DEVICE_SUB_GROUP_SIZES_INTEL, partition_type_property, kernel_kernel_pipe_support, + built_in_kernel_ids, // USM usm_device_allocations = PI_USM_DEVICE_SUPPORT, usm_host_allocations = PI_USM_HOST_SUPPORT, diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 5d4a7deb4a2af..01299271ec4ac 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 99fe35a842d2d..7e953ad72eabd 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -279,6 +280,25 @@ struct get_device_info, } }; +// Specialization for built in kernel identifiers +template <> +struct get_device_info, + info::device::built_in_kernel_ids> { + static std::vector get(RT::PiDevice dev, const plugin &Plugin) { + std::string result = + get_device_info::get( + dev, Plugin); + auto names = split_string(result, ';'); + + std::vector ids; + ids.reserve(names.size()); + for (const auto &name : names) { + ids.push_back(ProgramManager::getInstance().getBuiltInKernelID(name)); + } + return ids; + } +}; + // Specialization for built in kernels, splits the string returned by OpenCL template <> struct get_device_info, @@ -979,6 +999,12 @@ template <> inline bool get_device_info_host() { return true; } +template <> +inline std::vector +get_device_info_host() { + return {}; +} + template <> inline std::vector get_device_info_host() { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a056c1ec16b87..c2d6cce38eb3e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1341,6 +1341,19 @@ std::vector ProgramManager::getAllSYCLKernelIDs() { return AllKernelIDs; } +kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) { + std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); + + auto KernelID = m_BuiltInKernelIDs.find(KernelName); + if (KernelID == m_BuiltInKernelIDs.end()) { + auto Impl = std::make_shared(KernelName); + auto CachedID = createSyclObjFromImpl(Impl); + KernelID = m_BuiltInKernelIDs.insert({KernelName, CachedID}).first; + } + + return KernelID->second; +} + std::vector ProgramManager::getSYCLDeviceImagesWithCompatibleState( const context &Ctx, const std::vector &Devs, @@ -1511,6 +1524,17 @@ std::vector ProgramManager::getSYCLDeviceImages( std::vector ProgramManager::getSYCLDeviceImages( const context &Ctx, const std::vector &Devs, const std::vector &KernelIDs, bundle_state TargetState) { + { + std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); + + for (const kernel_id &ID : KernelIDs) { + if (m_BuiltInKernelIDs.find(ID.get_name()) != m_BuiltInKernelIDs.end()) + throw sycl::exception(make_error_code(errc::kernel_argument), + "Attempting to use a built-in kernel. They are " + "not fully supported"); + } + } + // Collect device images with compatible state std::vector DeviceImages = getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index d50e30ec6c238..1d1442c337b09 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -178,6 +178,10 @@ class ProgramManager { // in SYCL device images. std::vector getAllSYCLKernelIDs(); + // The function returns the unique SYCL kernel identifier associated with a + // built-in kernel name. + kernel_id getBuiltInKernelID(const std::string &KernelName); + // The function returns a vector of SYCL device images that are compiled with // the required state and at least one device from the passed list of devices. std::vector @@ -327,6 +331,13 @@ class ProgramManager { /// Access must be guarded by the m_KernelIDsMutex mutex. std::unordered_set m_ExportedSymbols; + /// Maps names of built-in kernels to their unique kernel IDs. + /// Access must be guarded by the m_BuiltInKernelIDsMutex mutex. + std::unordered_map m_BuiltInKernelIDs; + + /// Protects built-in kernel ID cache. + std::mutex m_BuiltInKernelIDsMutex; + // Keeps track of pi_program to image correspondence. Needed for: // - knowing which specialization constants are used in the program and // injecting their current values before compiling the SPIR-V; the binary diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 28f2d9d3c785b..95e0d29106179 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4109,9 +4109,14 @@ _ZNK2cl4sycl6device3hasENS0_6aspectE _ZNK2cl4sycl6device6is_cpuEv _ZNK2cl4sycl6device6is_gpuEv _ZNK2cl4sycl6device7is_hostEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131072EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131073EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131074EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131075EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16648EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16649EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16650EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16651EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16784EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16785EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE16786EEENS3_12param_traitsIS4_XT_EE11return_typeEv @@ -4244,6 +4249,7 @@ _ZNK2cl4sycl6streamneERKS1_ _ZNK2cl4sycl7context11get_backendEv _ZNK2cl4sycl7context11get_devicesEv _ZNK2cl4sycl7context12get_platformEv +_ZNK2cl4sycl7context12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK2cl4sycl7context12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property5image12use_host_ptrEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property5image13context_boundEEET_v @@ -4256,6 +4262,7 @@ _ZNK2cl4sycl7context12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property7no_initEEET_v _ZNK2cl4sycl7context12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v +_ZNK2cl4sycl7context12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7context12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property5image13context_boundEEEbv @@ -4268,8 +4275,6 @@ _ZNK2cl4sycl7context12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property7no_initEEEbv _ZNK2cl4sycl7context12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv -_ZNK2cl4sycl7context12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v -_ZNK2cl4sycl7context12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7context3getEv _ZNK2cl4sycl7context7is_hostEv _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT_EE11return_typeEv @@ -4288,6 +4293,7 @@ _ZNK2cl4sycl7program11get_backendEv _ZNK2cl4sycl7program11get_contextEv _ZNK2cl4sycl7program11get_devicesEv _ZNK2cl4sycl7program12get_binariesEv +_ZNK2cl4sycl7program12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK2cl4sycl7program12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property5image12use_host_ptrEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property5image13context_boundEEET_v @@ -4298,9 +4304,9 @@ _ZNK2cl4sycl7program12get_propertyINS0_8property6buffer13context_boundEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property6buffer9use_mutexEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v -_ZNK2cl4sycl7program12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property7no_initEEET_v _ZNK2cl4sycl7program12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v +_ZNK2cl4sycl7program12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7program12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property5image13context_boundEEEbv @@ -4311,7 +4317,6 @@ _ZNK2cl4sycl7program12has_propertyINS0_8property6buffer13context_boundEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property6buffer9use_mutexEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv -_ZNK2cl4sycl7program12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property7no_initEEEbv _ZNK2cl4sycl7program12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv _ZNK2cl4sycl7program16get_link_optionsB5cxx11Ev @@ -4324,6 +4329,7 @@ _ZNK2cl4sycl7program8get_infoILNS0_4info7programE4449EEENS3_12param_traitsIS4_XT _ZNK2cl4sycl7program8get_infoILNS0_4info7programE4451EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl7program9getNativeEv _ZNK2cl4sycl7program9get_stateEv +_ZNK2cl4sycl7sampler12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property5image12use_host_ptrEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property5image13context_boundEEET_v @@ -4336,7 +4342,7 @@ _ZNK2cl4sycl7sampler12get_propertyINS0_8property6noinitEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property7context4cuda19use_primary_contextEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property7no_initEEET_v _ZNK2cl4sycl7sampler12get_propertyINS0_8property9reduction22initialize_to_identityEEET_v -_ZNK2cl4sycl7sampler12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v +_ZNK2cl4sycl7sampler12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_3ext6oneapi8property6buffer22use_pinned_host_memoryEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property5image12use_host_ptrEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property5image13context_boundEEEbv @@ -4349,7 +4355,6 @@ _ZNK2cl4sycl7sampler12has_propertyINS0_8property6noinitEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property7context4cuda19use_primary_contextEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property7no_initEEEbv _ZNK2cl4sycl7sampler12has_propertyINS0_8property9reduction22initialize_to_identityEEEbv -_ZNK2cl4sycl7sampler12has_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEEbv _ZNK2cl4sycl7sampler18get_filtering_modeEv _ZNK2cl4sycl7sampler19get_addressing_modeEv _ZNK2cl4sycl7sampler33get_coordinate_normalization_modeEv @@ -4377,7 +4382,3 @@ _ZNK2cl4sycl9exception8categoryEv _ZNK2cl4sycl9kernel_id8get_nameEv __sycl_register_lib __sycl_unregister_lib -_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131072EEENS3_12param_traitsIS4_XT_EE11return_typeEv -_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131075EEENS3_12param_traitsIS4_XT_EE11return_typeEv -_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131074EEENS3_12param_traitsIS4_XT_EE11return_typeEv -_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE131073EEENS3_12param_traitsIS4_XT_EE11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index c69c91305ce27..0df42b5fa4e8d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -132,11 +132,16 @@ ??$get_info@$0BBLK@@kernel@sycl@cl@@QEBAIAEBVdevice@12@@Z ??$get_info@$0BBNC@@event@sycl@cl@@QEBAIXZ ??$get_info@$0BBND@@event@sycl@cl@@QEBA?AW4event_command_status@info@12@XZ +??$get_info@$0CAAAA@@device@sycl@cl@@QEBA_KXZ +??$get_info@$0CAAAB@@device@sycl@cl@@QEBA?AV?$id@$00@12@XZ +??$get_info@$0CAAAC@@device@sycl@cl@@QEBA?AV?$id@$01@12@XZ +??$get_info@$0CAAAD@@device@sycl@cl@@QEBA?AV?$id@$02@12@XZ ??$get_info@$0CADD@@kernel@sycl@cl@@QEBAIAEBVdevice@12@V?$range@$02@12@@Z ??$get_info@$0EBAI@@device@sycl@cl@@QEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@XZ ??$get_info@$0EBAJ@@device@sycl@cl@@QEBA?AW4partition_property@info@12@XZ ??$get_info@$0EBAK@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0EBAK@@kernel@sycl@cl@@QEBAIAEBVdevice@12@@Z +??$get_info@$0EBAL@@device@sycl@cl@@QEBA?AV?$vector@Vkernel_id@sycl@cl@@V?$allocator@Vkernel_id@sycl@cl@@@std@@@std@@XZ ??$get_info@$0EBJA@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0EBJB@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0EBJC@@device@sycl@cl@@QEBA_NXZ @@ -185,6 +190,9 @@ ??$get_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA?AVuse_pinned_host_memory@buffer@property@oneapi@ext@12@XZ ??$get_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA?AVuse_pinned_host_memory@buffer@property@oneapi@ext@12@XZ ??$get_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA?AVuse_pinned_host_memory@buffer@property@oneapi@ext@12@XZ +??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA?AVuse_primary_context@0property@cuda@oneapi@ext@12@XZ +??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA?AVuse_primary_context@context@property@cuda@oneapi@ext@12@XZ +??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA?AVuse_primary_context@context@property@cuda@oneapi@ext@12@XZ ??$get_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@context@sycl@cl@@QEBA?AVuse_primary_context@cuda@0property@12@XZ ??$get_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@program@sycl@cl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ ??$get_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@sampler@sycl@cl@@QEBA?AVuse_primary_context@cuda@context@property@12@XZ @@ -231,15 +239,12 @@ ??$has_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA_NXZ ??$has_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA_NXZ ??$has_property@Vuse_pinned_host_memory@buffer@property@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA_NXZ +??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA_NXZ +??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA_NXZ +??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA_NXZ ??$has_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@context@sycl@cl@@QEBA_NXZ ??$has_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@program@sycl@cl@@QEBA_NXZ ??$has_property@Vuse_primary_context@cuda@context@property@sycl@cl@@@sampler@sycl@cl@@QEBA_NXZ -??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA_NXZ -??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA?AVuse_primary_context@context@property@cuda@oneapi@ext@12@XZ -??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA?AVuse_primary_context@context@property@cuda@oneapi@ext@12@XZ -??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@sampler@sycl@cl@@QEBA_NXZ -??$get_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@context@sycl@cl@@QEBA?AVuse_primary_context@0property@cuda@oneapi@ext@12@XZ -??$has_property@Vuse_primary_context@context@property@cuda@oneapi@ext@sycl@cl@@@program@sycl@cl@@QEBA_NXZ ??0?$image_impl@$00@detail@sycl@cl@@QEAA@AEAV?$shared_ptr@X@std@@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$00@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@sycl@cl@@U?$default_delete@VSYCLMemObjAllocator@detail@sycl@cl@@@std@@@5@AEBVproperty_list@23@@Z ??0?$image_impl@$00@detail@sycl@cl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@sycl@cl@@U?$default_delete@VSYCLMemObjAllocator@detail@sycl@cl@@@std@@@std@@@Z ??0?$image_impl@$00@detail@sycl@cl@@QEAA@PEAXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$00@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@sycl@cl@@U?$default_delete@VSYCLMemObjAllocator@detail@sycl@cl@@@std@@@std@@AEBVproperty_list@23@@Z @@ -2042,7 +2047,6 @@ ?get@platform@sycl@cl@@QEBAPEAU_cl_platform_id@@XZ ?get@program@sycl@cl@@QEBAPEAU_cl_program@@XZ ?get@queue@sycl@cl@@QEBAPEAU_cl_command_queue@@XZ -?get_backend@kernel@sycl@cl@@QEBA?AW4backend@23@XZ ?getAssertHappenedBuffer@queue@sycl@cl@@AEAAAEAV?$buffer@UAssertHappened@detail@sycl@cl@@$00V?$aligned_allocator@D@234@X@23@XZ ?getBorderColor@detail@sycl@cl@@YA?AV?$vec@M$03@23@W4image_channel_order@23@@Z ?getBufSizeForContext@SYCLMemObjT@detail@sycl@cl@@SA_KAEBV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@PEAU_cl_mem@@@Z @@ -2122,6 +2126,7 @@ ?get_backend@device@sycl@cl@@QEBA?AW4backend@23@XZ ?get_backend@event@sycl@cl@@QEBA?AW4backend@23@XZ ?get_backend@interop_handle@sycl@cl@@QEBA?AW4backend@23@XZ +?get_backend@kernel@sycl@cl@@QEBA?AW4backend@23@XZ ?get_backend@kernel_bundle_plain@detail@sycl@cl@@QEBA?AW4backend@34@XZ ?get_backend@platform@sycl@cl@@QEBA?AW4backend@23@XZ ?get_backend@program@sycl@cl@@QEBA?AW4backend@23@XZ @@ -2583,8 +2588,8 @@ ?make_event@detail@sycl@cl@@YA?AVevent@23@_KAEBVcontext@23@_NW4backend@23@@Z ?make_event@level_zero@oneapi@ext@sycl@cl@@YA?AVevent@45@AEBVcontext@45@_K_N@Z ?make_event@level_zero@sycl@cl@@YA?AVevent@23@AEBVcontext@23@_K_N@Z -?make_kernel@detail@sycl@cl@@YA?AVkernel@23@_KAEBVcontext@23@W4backend@23@@Z ?make_kernel@detail@sycl@cl@@YA?AVkernel@23@AEBVcontext@23@AEBV?$kernel_bundle@$01@23@_K_NW4backend@23@@Z +?make_kernel@detail@sycl@cl@@YA?AVkernel@23@_KAEBVcontext@23@W4backend@23@@Z ?make_kernel_bundle@detail@sycl@cl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@_KAEBVcontext@23@W4bundle_state@23@W4backend@23@@Z ?make_kernel_bundle@detail@sycl@cl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@sycl@cl@@@std@@_KAEBVcontext@23@_NW4bundle_state@23@W4backend@23@@Z ?make_platform@detail@sycl@cl@@YA?AVplatform@23@_KW4backend@23@@Z @@ -4591,9 +4596,5 @@ ?what@exception@sycl@cl@@UEBAPEBDXZ ?wrapIntoImageBuffer@MemoryManager@detail@sycl@cl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@PEAXPEAVSYCLMemObjI@234@@Z DllMain -??$get_info@$0CAAAB@@device@sycl@cl@@QEBA?AV?$id@$00@12@XZ -??$get_info@$0CAAAA@@device@sycl@cl@@QEBA_KXZ -??$get_info@$0CAAAD@@device@sycl@cl@@QEBA?AV?$id@$02@12@XZ -??$get_info@$0CAAAC@@device@sycl@cl@@QEBA?AV?$id@$01@12@XZ __sycl_register_lib __sycl_unregister_lib diff --git a/sycl/test/warnings/sycl_2020_deprecations.cpp b/sycl/test/warnings/sycl_2020_deprecations.cpp index 611749137cfaf..b12aeb66d8e4a 100644 --- a/sycl/test/warnings/sycl_2020_deprecations.cpp +++ b/sycl/test/warnings/sycl_2020_deprecations.cpp @@ -142,6 +142,10 @@ int main() { auto MCA = sycl::info::device::max_constant_args; (void)MCA; + // expected-warning@+1{{'built_in_kernels' is deprecated: use built_in_kernel_ids instead}} + auto BIK = sycl::info::device::built_in_kernels; + (void)BIK; + // expected-warning@+1{{'extensions' is deprecated: platform::extensions is deprecated, use device::get_info() with info::device::aspects instead.}} auto PE = sycl::info::platform::extensions; From 9eb5c991f9349f2f396346924aa3ba3988ff5976 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Mon, 22 Nov 2021 14:56:57 +0100 Subject: [PATCH 08/14] [LIBCLC] Make sure flags are unset when building libclc for AMD (#4991) Without this patch the flags always contain `-mcpu=tahiti`. The problem occurs when llvm compiles the spirv module it chooses `llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS` as the subtarget. The instruction encoding differers, and when targeting `llvm::AMDGPUSubtarget::GFX9` the final binary is invalid. This in turns causes the `HSA_STATUS_ERROR_ILLEGAL_INSTRUCTION`. --- libclc/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 9bcb9735cb91e..fad7f0fec4ce9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -322,6 +322,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endforeach() endforeach() + # Please NOTE that variables in the foreach are not local and thus need + # to be reset every iteration. foreach( d ${${t}_devices} ) # Some targets don't have a specific GPU to target if( ${d} STREQUAL "none" OR ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) @@ -330,6 +332,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Disables NVVM reflection to defer to after linking set( flags "SHELL:-Xclang -target-feature" "SHELL:-Xclang +ptx72" "SHELL:-march=sm_86" "SHELL:-mllvm --nvvm-reflect-enable=false") + else() + set ( flags ) endif() set( arch_suffix "${t}" ) else() From 5f562beca388af415f7196c22b21827b4d4cf812 Mon Sep 17 00:00:00 2001 From: Sergey Dmitriev Date: Tue, 23 Nov 2021 01:19:46 +0700 Subject: [PATCH 09/14] [SYCL][ESIMD] Add ESIMD-specific IR verification pass (#4965) Signed-off-by: Sergey Dmitriev --- clang/lib/CodeGen/BackendUtil.cpp | 4 + llvm/include/llvm/InitializePasses.h | 1 + llvm/include/llvm/LinkAllPasses.h | 2 + llvm/include/llvm/SYCLLowerIR/ESIMDVerifier.h | 30 +++++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/SYCLLowerIR/CMakeLists.txt | 1 + llvm/lib/SYCLLowerIR/ESIMDVerifier.cpp | 122 ++++++++++++++++++ llvm/tools/opt/opt.cpp | 1 + sycl/test/esimd/esimd_verify.cpp | 14 ++ sycl/test/esimd/spirv_intrins_trans.cpp | 73 +++++------ sycl/test/esimd/vadd.cpp | 42 +++--- 12 files changed, 236 insertions(+), 56 deletions(-) create mode 100644 llvm/include/llvm/SYCLLowerIR/ESIMDVerifier.h create mode 100644 llvm/lib/SYCLLowerIR/ESIMDVerifier.cpp create mode 100644 sycl/test/esimd/esimd_verify.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d214f03eaa7be..ffae6a10c821d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -42,6 +42,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" +#include "llvm/SYCLLowerIR/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/CommandLine.h" @@ -849,6 +850,9 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM, FPM.add(createVerifierPass()); // Set up the per-module pass manager. + if (LangOpts.SYCLIsDevice) + MPM.add(createESIMDVerifierPass()); + if (!CodeGenOpts.RewriteMapFiles.empty()) addSymbolRewriterPass(CodeGenOpts, &MPM); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 8cec011926259..1843009a799b1 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -438,6 +438,7 @@ void initializeSYCLLowerESIMDLegacyPassPass(PassRegistry &); void initializeSPIRITTAnnotationsLegacyPassPass(PassRegistry &); void initializeESIMDLowerLoadStorePass(PassRegistry &); void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &); +void initializeESIMDVerifierPass(PassRegistry &); void initializeSYCLLowerWGLocalMemoryLegacyPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 5707a4d8abe1d..e7151ff20d63a 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -37,6 +37,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/SYCLLowerIR/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" @@ -208,6 +209,7 @@ namespace { (void)llvm::createSYCLLowerESIMDPass(); (void)llvm::createESIMDLowerLoadStorePass(); (void)llvm::createESIMDLowerVecArgPass(); + (void)llvm::createESIMDVerifierPass(); (void)llvm::createSPIRITTAnnotationsLegacyPass(); (void)llvm::createSYCLLowerWGLocalMemoryLegacyPass(); std::string buf; diff --git a/llvm/include/llvm/SYCLLowerIR/ESIMDVerifier.h b/llvm/include/llvm/SYCLLowerIR/ESIMDVerifier.h new file mode 100644 index 0000000000000..d5ef20b27e232 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/ESIMDVerifier.h @@ -0,0 +1,30 @@ +//===--------- ESIMDVerifier.h - ESIMD-specific IR verification -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ESIMD verification pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SYCLLOWERIR_ESIMDVERIFIER_H +#define LLVM_SYCLLOWERIR_ESIMDVERIFIER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct ESIMDVerifierPass : public PassInfoMixin { + ESIMDVerifierPass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + static bool isRequired() { return true; } +}; + +ModulePass *createESIMDVerifierPass(); + +} // namespace llvm + +#endif // LLVM_SYCLLOWERIR_ESIMDVERIFIER_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 074c1f354d5bd..4ae93b76eadd5 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -75,6 +75,7 @@ #include "llvm/IR/PrintPasses.h" #include "llvm/IR/SafepointIRVerifier.h" #include "llvm/IR/Verifier.h" +#include "llvm/SYCLLowerIR/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 28470c5b3f2d5..1331b75cf9113 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -117,6 +117,7 @@ MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass()) MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass()) +MODULE_PASS("esimd-verifier", ESIMDVerifierPass()) MODULE_PASS("SPIRITTAnnotations", SPIRITTAnnotationsPass()) MODULE_PASS("deadargelim-sycl", DeadArgumentEliminationSYCLPass()) #undef MODULE_PASS diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 900e62b9be5e1..c68f72b1b33fc 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -53,6 +53,7 @@ add_llvm_component_library(LLVMSYCLLowerIR LowerESIMDVLoadVStore.cpp LowerESIMDVecArg.cpp LowerWGLocalMemory.cpp + ESIMDVerifier.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLLowerIR/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMDVerifier.cpp new file mode 100644 index 0000000000000..6535e7f20f652 --- /dev/null +++ b/llvm/lib/SYCLLowerIR/ESIMDVerifier.cpp @@ -0,0 +1,122 @@ +//===---------- ESIMDVerifier.cpp - ESIMD-specific IR verification --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements ESIMD specific IR verification pass. So far it only +// detects invalid API calls in ESIMD context. +// +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/ESIMDVerifier.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" + +using namespace llvm; + +#define DEBUG_TYPE "esimd-verifier" + +// A list of unsupported functions in ESIMD context. +static const char *IllegalFunctions[] = { + "^cl::sycl::multi_ptr<.+> cl::sycl::accessor<.+>::get_pointer<.+>\\(\\) " + "const"}; + +namespace { + +class ESIMDVerifierImpl { + const Module &M; + +public: + ESIMDVerifierImpl(const Module &M) : M(M) {} + + void verify() { + SmallPtrSet Visited; + SmallVector Worklist; + + auto Add2Worklist = [&Worklist, &Visited](const Function *F) { + if (Visited.insert(F).second) + Worklist.push_back(F); + }; + + // Start with adding all ESIMD functions to the work list. + for (const Function &F : M) + if (F.hasMetadata("sycl_explicit_simd")) + Add2Worklist(&F); + + // Then check ESIMD functions and all functions called from ESIMD context + // for invalid calls. + while (!Worklist.empty()) { + const Function *F = Worklist.pop_back_val(); + for (const Instruction &I : instructions(F)) { + if (auto *CB = dyn_cast(&I)) { + Function *Callee = CB->getCalledFunction(); + if (!Callee) + continue; + + // Demangle called function name and check if it matches any illegal + // function name. Report an error if there is a match. + std::string DemangledName = demangle(Callee->getName().str()); + for (const char *Name : IllegalFunctions) { + Regex NameRE(Name); + assert(NameRE.isValid() && "invalid function name regex"); + if (NameRE.match(DemangledName)) { + std::string ErrorMsg = std::string("function '") + DemangledName + + "' is not supported in ESIMD context"; + F->getContext().emitError(&I, ErrorMsg); + } + } + + // Add callee to the list to be analyzed if it is not a declaration. + if (!Callee->isDeclaration()) + Add2Worklist(Callee); + } + } + } + } +}; + +} // end anonymous namespace + +PreservedAnalyses ESIMDVerifierPass::run(Module &M, ModuleAnalysisManager &AM) { + ESIMDVerifierImpl(M).verify(); + return PreservedAnalyses::all(); +} + +namespace { + +struct ESIMDVerifier : public ModulePass { + static char ID; + + ESIMDVerifier() : ModulePass(ID) { + initializeESIMDVerifierPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) override { + ESIMDVerifierImpl(M).verify(); + return false; + } +}; + +} // end anonymous namespace + +char ESIMDVerifier::ID = 0; + +INITIALIZE_PASS_BEGIN(ESIMDVerifier, DEBUG_TYPE, "ESIMD-specific IR verifier", + false, false) +INITIALIZE_PASS_END(ESIMDVerifier, DEBUG_TYPE, "ESIMD-specific IR verifier", + false, false) + +ModulePass *llvm::createESIMDVerifierPass() { return new ESIMDVerifier(); } diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 165083b251797..e0fe6de4de12a 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -579,6 +579,7 @@ int main(int argc, char **argv) { initializeSPIRITTAnnotationsLegacyPassPass(Registry); initializeESIMDLowerLoadStorePass(Registry); initializeESIMDLowerVecArgLegacyPassPass(Registry); + initializeESIMDVerifierPass(Registry); initializeSYCLLowerWGLocalMemoryLegacyPass(Registry); #ifdef BUILD_EXAMPLES diff --git a/sycl/test/esimd/esimd_verify.cpp b/sycl/test/esimd/esimd_verify.cpp new file mode 100644 index 0000000000000..e9b91294187d2 --- /dev/null +++ b/sycl/test/esimd/esimd_verify.cpp @@ -0,0 +1,14 @@ +// RUN: not %clangxx -fsycl -fsycl-device-only -S %s -o %t 2>&1 | FileCheck %s + +#include + +using namespace cl::sycl; +using namespace sycl::ext::intel::experimental::esimd; + +// CHECK: error: function 'cl::sycl::multi_ptr<{{.+}}> cl::sycl::accessor<{{.+}}>::get_pointer<{{.+}}>() const' is not supported in ESIMD context + +SYCL_EXTERNAL auto +test(accessor &acc) + SYCL_ESIMD_FUNCTION { + return acc.get_pointer(); +} diff --git a/sycl/test/esimd/spirv_intrins_trans.cpp b/sycl/test/esimd/spirv_intrins_trans.cpp index 7f7da671e01fb..8aa6dc17d8d0a 100644 --- a/sycl/test/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/esimd/spirv_intrins_trans.cpp @@ -16,10 +16,10 @@ __attribute__((sycl_kernel)) void kernel(Func kernelFunc) { size_t caller() { - size_t DoNotOpt; - cl::sycl::buffer buf(&DoNotOpt, 1); - uint32_t DoNotOpt32; - cl::sycl::buffer buf32(&DoNotOpt32, 1); + size_t DoNotOpt[1]; + cl::sycl::buffer buf(&DoNotOpt[0], 1); + uint32_t DoNotOpt32[1]; + cl::sycl::buffer buf32(&DoNotOpt32[0], 1); size_t DoNotOptXYZ[3]; cl::sycl::buffer bufXYZ(&DoNotOptXYZ[0], sycl::range<1>(3)); @@ -29,7 +29,7 @@ size_t caller() { auto DoNotOptimize32 = buf32.get_access(cgh); kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalInvocationId_x(); + DoNotOptimize[0] = __spirv_GlobalInvocationId_x(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() @@ -39,7 +39,7 @@ size_t caller() { // CHECK: {{.*}} call i32 @llvm.genx.group.id.x() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalInvocationId_y(); + DoNotOptimize[0] = __spirv_GlobalInvocationId_y(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() @@ -49,7 +49,7 @@ size_t caller() { // CHECK: {{.*}} call i32 @llvm.genx.group.id.y() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalInvocationId_z(); + DoNotOptimize[0] = __spirv_GlobalInvocationId_z(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalInvocationId_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() @@ -58,27 +58,24 @@ size_t caller() { // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() - kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalSize_x(); - }); + kernel( + [=]() SYCL_ESIMD_KERNEL { DoNotOptimize[0] = __spirv_GlobalSize_x(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_x // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 0 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 0 - kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalSize_y(); - }); + kernel( + [=]() SYCL_ESIMD_KERNEL { DoNotOptimize[0] = __spirv_GlobalSize_y(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_y // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 1 // CHECK: [[CALL_ESIMD2:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 1 - kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalSize_z(); - }); + kernel( + [=]() SYCL_ESIMD_KERNEL { DoNotOptimize[0] = __spirv_GlobalSize_z(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalSize_z // CHECK: [[CALL_ESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD1]], i32 2 @@ -86,99 +83,99 @@ size_t caller() { // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD2]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalOffset_x(); + DoNotOptimize[0] = __spirv_GlobalOffset_x(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalOffset_x // CHECK: store i64 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalOffset_y(); + DoNotOptimize[0] = __spirv_GlobalOffset_y(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalOffset_y // CHECK: store i64 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_GlobalOffset_z(); + DoNotOptimize[0] = __spirv_GlobalOffset_z(); }); // CHECK-LABEL: @{{.*}}kernel_GlobalOffset_z // CHECK: store i64 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_NumWorkgroups_x(); + DoNotOptimize[0] = __spirv_NumWorkgroups_x(); }); // CHECK-LABEL: @{{.*}}kernel_NumWorkgroups_x // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_NumWorkgroups_y(); + DoNotOptimize[0] = __spirv_NumWorkgroups_y(); }); // CHECK-LABEL: @{{.*}}kernel_NumWorkgroups_y // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 1 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_NumWorkgroups_z(); + DoNotOptimize[0] = __spirv_NumWorkgroups_z(); }); // CHECK-LABEL: @{{.*}}kernel_NumWorkgroups_z // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.group.count.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupSize_x(); + DoNotOptimize[0] = __spirv_WorkgroupSize_x(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupSize_x // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupSize_y(); + DoNotOptimize[0] = __spirv_WorkgroupSize_y(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupSize_y // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 1 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupSize_z(); + DoNotOptimize[0] = __spirv_WorkgroupSize_z(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupSize_z // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 2 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupId_x(); + DoNotOptimize[0] = __spirv_WorkgroupId_x(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupId_x // CHECK: {{.*}} call i32 @llvm.genx.group.id.x() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupId_y(); + DoNotOptimize[0] = __spirv_WorkgroupId_y(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupId_y // CHECK: {{.*}} call i32 @llvm.genx.group.id.y() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_WorkgroupId_z(); + DoNotOptimize[0] = __spirv_WorkgroupId_z(); }); // CHECK-LABEL: @{{.*}}kernel_WorkgroupId_z // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_LocalInvocationId_x(); + DoNotOptimize[0] = __spirv_LocalInvocationId_x(); }); // CHECK-LABEL: @{{.*}}kernel_LocalInvocationId_x // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 0 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_LocalInvocationId_y(); + DoNotOptimize[0] = __spirv_LocalInvocationId_y(); }); // CHECK-LABEL: @{{.*}}kernel_LocalInvocationId_y // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() // CHECK: {{.*}} extractelement <3 x i32> [[CALL_ESIMD]], i32 1 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_LocalInvocationId_z(); + DoNotOptimize[0] = __spirv_LocalInvocationId_z(); }); // CHECK-LABEL: @{{.*}}kernel_LocalInvocationId_z // CHECK: [[CALL_ESIMD:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32() @@ -218,8 +215,8 @@ size_t caller() { // CHECK: {{.*}} call i32 @llvm.genx.group.id.z() kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_SubgroupLocalInvocationId(); - *DoNotOptimize32.get_pointer() = __spirv_SubgroupLocalInvocationId() + 3; + DoNotOptimize[0] = __spirv_SubgroupLocalInvocationId(); + DoNotOptimize32[0] = __spirv_SubgroupLocalInvocationId() + 3; }); // CHECK-LABEL: @{{.*}}kernel_SubgroupLocalInvocationId // CHECK: [[ZEXT0:%.*]] = zext i32 0 to i64 @@ -227,8 +224,8 @@ size_t caller() { // CHECK: add i32 0, 3 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_SubgroupSize(); - *DoNotOptimize32.get_pointer() = __spirv_SubgroupSize() + 7; + DoNotOptimize[0] = __spirv_SubgroupSize(); + DoNotOptimize32[0] = __spirv_SubgroupSize() + 7; }); // CHECK-LABEL: @{{.*}}kernel_SubgroupSize // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 @@ -236,13 +233,13 @@ size_t caller() { // CHECK: add i32 1, 7 kernel([=]() SYCL_ESIMD_KERNEL { - *DoNotOptimize.get_pointer() = __spirv_SubgroupMaxSize(); - *DoNotOptimize32.get_pointer() = __spirv_SubgroupMaxSize() + 9; + DoNotOptimize[0] = __spirv_SubgroupMaxSize(); + DoNotOptimize32[0] = __spirv_SubgroupMaxSize() + 9; }); // CHECK-LABEL: @{{.*}}kernel_SubgroupMaxSize // CHECK: [[ZEXT0:%.*]] = zext i32 1 to i64 // CHECK: store i64 [[ZEXT0]] // CHECK: add i32 1, 9 }); - return DoNotOpt; + return DoNotOpt[0]; } diff --git a/sycl/test/esimd/vadd.cpp b/sycl/test/esimd/vadd.cpp index c1159fd29cf9d..90dcaf9ab3421 100644 --- a/sycl/test/esimd/vadd.cpp +++ b/sycl/test/esimd/vadd.cpp @@ -54,19 +54,33 @@ int main(void) { constexpr unsigned VL = 32; constexpr unsigned GroupSize = 2; - int A[Size]; - int B[Size]; - int C[Size] = {}; + struct Deleter { + queue Q; + void operator()(int *Ptr) { + if (Ptr) { + sycl::free(Ptr, Q); + } + } + }; + + queue q(ESIMDSelector{}, exception_handler); + + std::unique_ptr BufA(sycl::malloc_shared(Size, q), + Deleter{q}); + std::unique_ptr BufB( + sycl::aligned_alloc_shared(16u, Size, q), Deleter{q}); + std::unique_ptr BufC( + sycl::aligned_alloc_shared(16u, Size, q), Deleter{q}); + + int *A = BufA.get(); + int *B = BufB.get(); + int *C = BufC.get(); for (unsigned i = 0; i < Size; ++i) { A[i] = B[i] = i; } { - cl::sycl::buffer bufA(A, Size); - cl::sycl::buffer bufB(B, Size); - cl::sycl::buffer bufC(C, Size); - // We need that many task groups cl::sycl::range<1> GroupRange{Size / VL}; @@ -75,28 +89,20 @@ int main(void) { cl::sycl::nd_range<1> Range{GroupRange, TaskRange}; - queue q(ESIMDSelector{}, exception_handler); q.submit([&](cl::sycl::handler &cgh) { - auto accA = bufA.get_access(cgh); - auto accB = bufB.get_access(cgh); - auto accC = bufC.get_access(cgh); - cgh.parallel_for( Range, [=](nd_item<1> ndi) SYCL_ESIMD_KERNEL { using namespace sycl::ext::intel::experimental::esimd; - auto pA = accA.get_pointer().get(); - auto pB = accB.get_pointer().get(); - auto pC = accC.get_pointer().get(); int i = ndi.get_global_id(0); constexpr int ESIZE = sizeof(int); simd offsets(0, ESIZE); - simd va = gather(pA + i * VL, offsets); - simd vb = block_load(pB + i * VL); + simd va = gather(A + i * VL, offsets); + simd vb = block_load(B + i * VL); simd vc = va + vb; - block_store(pC + i * VL, vc); + block_store(C + i * VL, vc); }); }); } From 390e1055bcf3a1a22d7fb1b3ca4eb1f87b885625 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 22 Nov 2021 19:33:53 +0000 Subject: [PATCH 10/14] [LIBCLC] Fix config.h include (#5011) This is fixing building libclc for `amdgcn-amdhsa`. It showed up in the `ldexp.cl` builtin. The issue is that `_CLC_DECL` is declared in `func.h` and in other files `config.h` relies on other `func.h` being included first. This is confusing and error prone so include `func.h` directly in `config.h` instead. --- libclc/generic/include/config.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libclc/generic/include/config.h b/libclc/generic/include/config.h index 2994199b02c5b..fbfa22e6b6f6b 100644 --- a/libclc/generic/include/config.h +++ b/libclc/generic/include/config.h @@ -20,6 +20,8 @@ * THE SOFTWARE. */ +#include "func.h" + _CLC_DECL bool __clc_subnormals_disabled(); _CLC_DECL bool __clc_fp16_subnormals_supported(); _CLC_DECL bool __clc_fp32_subnormals_supported(); From 782bea809ae0f1103de3c405fa0ada9b9abe7383 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 23 Nov 2021 03:14:54 +0000 Subject: [PATCH 11/14] [SYCL][HIP] Fix unused argument warning in unsupported function (#5010) This was producing warnings when building with clang, so explicitely mark the arguments as unused by casting them to `void`. --- sycl/plugins/hip/pi_hip.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 2846e0139e2fb..d5ce87e8b410d 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -567,6 +567,7 @@ pi_result _pi_program::build_program(const char *build_options) { /// has_kernel method, so an alternative would be to move the has_kernel /// query to PI and use hipModuleGetFunction to check for a kernel. std::string getKernelNames(pi_program program) { + (void)program; cl::sycl::detail::pi::die("getKernelNames not implemented"); return {}; } @@ -2949,6 +2950,15 @@ pi_result hip_piProgramLink(pi_context context, pi_uint32 num_devices, void (*pfn_notify)(pi_program program, void *user_data), void *user_data, pi_program *ret_program) { + (void)context; + (void)num_devices; + (void)device_list; + (void)options; + (void)num_input_programs; + (void)input_programs; + (void)pfn_notify; + (void)user_data; + (void)ret_program; cl::sycl::detail::pi::die( "hip_piProgramLink: linking not supported with hip backend"); return {}; From 4fd11c7b2e4399ba40591a2772f611bf05b61eec Mon Sep 17 00:00:00 2001 From: qichaogu Date: Tue, 23 Nov 2021 11:15:53 +0800 Subject: [PATCH 12/14] [SYCL][NFC] Update tests for device split code mode (#5000) CHK-NO-SPLIT-NOT is not able to actually validate tests due to defect in the pattern. Update the macro to make it work effectively. Signed-off-by: Qichao Gu --- clang/test/Driver/sycl-offload-with-split.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-offload-with-split.c b/clang/test/Driver/sycl-offload-with-split.c index c7f12c9f0c758..7ab6c50470197 100644 --- a/clang/test/Driver/sycl-offload-with-split.c +++ b/clang/test/Driver/sycl-offload-with-split.c @@ -310,7 +310,7 @@ // RUN: | FileCheck %s -check-prefixes=CHK-NO-SPLIT // RUN: %clang_cl -### -fsycl -fsycl-device-code-split -fsycl-device-code-split=off %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-NO-SPLIT -// CHK-NO-SPLIT-NOT: sycl-post-link{{.*}} -split{{.*}} +// CHK-NO-SPLIT-NOT: sycl-post-link{{.*}} "-split={{.*}} // Check no device code split mode is passed to sycl-post-link when -fsycl-device-code-split is not set and the target is FPGA // RUN: %clang -### -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 | FileCheck %s -check-prefixes=CHK-NO-SPLIT From e2fdf15d47c1dc279e67d7c7104fbb9138cd5a67 Mon Sep 17 00:00:00 2001 From: Gordon Brown Date: Tue, 23 Nov 2021 04:36:43 +0000 Subject: [PATCH 13/14] Update documentation for HIP backend (#5004) Update the HIP sections of the GetStartedGuide and EnvironmentVariables documentation with details regarding usage, requirements and current implementation status. --- sycl/doc/EnvironmentVariables.md | 1 + sycl/doc/GetStartedGuide.md | 44 +++++++++++++++++++------------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index e0ea942ea552d..6de220275d116 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -40,6 +40,7 @@ The value of this environment variable is a comma separated list of filters, whe - `level_zero` - `opencl` - `cuda` +- `hip` - `*` Possible values of `device_type` are: diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index 1daf6b09a1944..5d800fbf28c0a 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -187,22 +187,25 @@ LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DPCPP_HOME/llvm/build/lib ./a.out ### Build DPC++ toolchain with support for HIP AMD -There is experimental support for DPC++ for HIP devices. +There is experimental support for DPC++ for HIP on AMD devices. Note as this is +still experimental and there is no continuous integration for this yet there +are therefore no guarantees for supported platforms or configurations. To enable support for HIP devices, follow the instructions for the Linux DPC++ toolchain, but add the `--hip` flag to `configure.py` Enabling this flag requires an installation of -ROCm 4.2.0 on the system, refer to +ROCm on the system, for instruction on how to install this refer to [AMD ROCm Installation Guide for Linux](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html). -Currently, the only combination tested is Ubuntu 18.04 with ROCm 4.2.0 using a Vega20 gfx906. +Currently, this has only been tried on Linux, with ROCm 4.2.0 or 4.3.0 and +using the MI50 (gfx906) and MI100 (gfx908) devices. -[LLD](https://llvm.org/docs/AMDGPUUsage.html) is necessary for the AMD GPU compilation chain. -The AMDGPU backend generates a standard ELF [ELF] relocatable code object that can be linked by lld to -produce a standard ELF shared code object which can be loaded and executed on an AMDGPU target. -So if you want to support HIP AMD, you should also build the lld project. -[LLD Build Guide](https://lld.llvm.org/) +[LLD](https://llvm.org/docs/AMDGPUUsage.html) is necessary for the AMDGPU compilation chain. +The AMDGPU backend generates a standard ELF [ELF] relocatable code object that can be linked by lld to +produce a standard ELF shared code object which can be loaded and executed on an AMDGPU target. +The LLD project is enabled by default when configuring for HIP. For more details +on building LLD refer to [LLD Build Guide](https://lld.llvm.org/). The following CMake variables can be updated to change where CMake is looking for the HIP installation: @@ -216,7 +219,9 @@ for the HIP installation: ### Build DPC++ toolchain with support for HIP NVIDIA -There is experimental support for DPC++ for using HIP on NVIDIA devices. +There is experimental support for DPC++ for HIP on Nvidia devices. Note as this +is still experimental and there is no continuous integration for this yet there +are therefore no guarantees for supported platforms or configurations. This is a compatibility feature and the [CUDA backend](#build-dpc-toolchain-with-support-for-nvidia-cuda) should be preferred to run on NVIDIA GPUs. @@ -230,8 +235,8 @@ Enabling this flag requires HIP to be installed, more specifically as well as CUDA to be installed, see [NVIDIA CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html). -Currently this was only tested on Linux with ROCm 4.2, CUDA 11 and a GeForce GTX -1060 card. +Currently, this has only been tried on Linux, with ROCm 4.2.0 or 4.3.0, with +CUDA 11, and using a GeForce 1060 device. ### Build DPC++ toolchain with support for ESIMD CPU Emulation @@ -810,13 +815,16 @@ which contains all the symbols required. ### HIP back-end limitations -* For supported Operating Systems, please refer to the [Supported Operating Systems](https://github.com/RadeonOpenCompute/ROCm#supported-operating-systems) -* The only combination tested is Ubuntu 18.04 with ROCm 4.2 using a Vega20 gfx906. -* Judging from the current [test](https://github.com/zjin-lcf/oneAPI-DirectProgramming) results, - there is still a lot of room for improvement in HIP back-end support. The current problems include three aspects. - The first one is at compile time: the `barrier` and `atomic` keywords are not supported. - The second is at runtime: when calling `hipMemcpyDtoHAsync` HIP API, the program will cause an exception if the input data size is too large. - The third is calculation accuracy: the HIP backend has obvious errors in the calculation results of some float type operators +* Requires a ROCm compatible operating system, for full details of supported + Operating System for ROCm, please refer to the + [ROCm Supported Operating Systems](https://github.com/RadeonOpenCompute/ROCm#supported-operating-systems). +* Has only been tried with ROCm 4.2.0 and 4.3.0. +* Has only been tested using the MI50 (gfx906) and MI100 (gfx908) devices. +* Support is still experimental so not all of the tests are currently passing + and many of the built-in function are not yet implemented. +* Additionally there is no continuous integration yet so no guarantee can be + made for support platforms or configurations +* Global offsets are currently not supported. ## Find More From 0e28541f25c6dfc3b4a4e969102c3c2a36901c4b Mon Sep 17 00:00:00 2001 From: Alexander Batashev Date: Tue, 23 Nov 2021 09:23:45 +0300 Subject: [PATCH 14/14] [XPTI][SYCL] Minor fixes to CMakeLists.txt (#5003) - improve headers copy mechanisms to ensure it is done only when files are changed; - exclude XPTI unit tests from all targets by default. --- sycl/CMakeLists.txt | 14 ++++++++------ xpti/CMakeLists.txt | 6 ++++-- xptifw/unit_test/CMakeLists.txt | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index ca6a73ed7d418..87883dc217969 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -118,17 +118,19 @@ configure_file("${feature_header}.in" "${feature_header}") # TODO: detect and process remove header/directory case file(GLOB_RECURSE HEADERS_IN_SYCL_DIR CONFIGURE_DEPENDS "${sycl_inc_dir}/sycl/*") file(GLOB_RECURSE HEADERS_IN_CL_DIR CONFIGURE_DEPENDS "${sycl_inc_dir}/CL/*") +string(REPLACE "${sycl_inc_dir}" "${SYCL_INCLUDE_BUILD_DIR}" + OUT_HEADERS_IN_SYCL_DIR "${HEADERS_IN_SYCL_DIR}") +string(REPLACE "${sycl_inc_dir}" "${SYCL_INCLUDE_BUILD_DIR}" + OUT_HEADERS_IN_CL_DIR "${HEADERS_IN_CL_DIR}") # Copy SYCL headers from sources to build directory add_custom_target(sycl-headers - DEPENDS ${SYCL_INCLUDE_BUILD_DIR}/sycl - ${SYCL_INCLUDE_BUILD_DIR}/sycl/sycl.hpp - ${SYCL_INCLUDE_BUILD_DIR}/sycl/CL) + DEPENDS ${OUT_HEADERS_IN_SYCL_DIR} + ${OUT_HEADERS_IN_CL_DIR}) add_custom_command( - OUTPUT ${SYCL_INCLUDE_BUILD_DIR}/sycl - ${SYCL_INCLUDE_BUILD_DIR}/sycl/sycl.hpp - ${SYCL_INCLUDE_BUILD_DIR}/sycl/CL + OUTPUT ${OUT_HEADERS_IN_SYCL_DIR} + ${OUT_HEADERS_IN_CL_DIR} DEPENDS ${HEADERS_IN_SYCL_DIR} ${HEADERS_IN_CL_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${sycl_inc_dir}/sycl ${SYCL_INCLUDE_BUILD_DIR}/sycl diff --git a/xpti/CMakeLists.txt b/xpti/CMakeLists.txt index cc57d795e7073..b5e8860be2deb 100644 --- a/xpti/CMakeLists.txt +++ b/xpti/CMakeLists.txt @@ -78,11 +78,13 @@ add_subdirectory(src) if (LLVM_BINARY_DIR) file(GLOB_RECURSE XPTI_HEADERS_LIST CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/include/xpti/*") + string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}" "${LLVM_BINARY_DIR}" + XPTI_HEADERS_OUT_LIST "${XPTI_HEADERS_LIST}") add_custom_target(xpti-headers - DEPENDS ${LLVM_BINARY_DIR}/include/xpti) + DEPENDS ${XPTI_HEADERS_OUT_LIST}) add_custom_command( - OUTPUT ${LLVM_BINARY_DIR}/include/xpti + OUTPUT ${XPTI_HEADERS_OUT_LIST} DEPENDS ${XPTI_HEADERS_LIST} COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/include/xpti diff --git a/xptifw/unit_test/CMakeLists.txt b/xptifw/unit_test/CMakeLists.txt index e1689356df30c..5767a3197f7d5 100644 --- a/xptifw/unit_test/CMakeLists.txt +++ b/xptifw/unit_test/CMakeLists.txt @@ -38,7 +38,7 @@ if (NOT DEFINED LLVM_EXTERNAL_XPTIFW_SOURCE_DIR) endif() endif() -add_executable(XPTIFWUnitTests +add_executable(XPTIFWUnitTests EXCLUDE_FROM_ALL xpti_api_tests.cpp xpti_correctness_tests.cpp )