Skip to content

Commit

Permalink
[SYCL] [FPGA] Create experimental headers for FPGA latency control (#…
Browse files Browse the repository at this point in the history
…5066)

Create experimental header files that provide user API for
FPGA latency control feature.

experimental/fpga_lsu.hpp and experimental/pipes.hpp are
simply the original version plus latency control API
described in the extension documents.

experimental/fpga_utils.hpp provides utility functions used
by experimental LSU and pipe headers.
fpga_extensions.hpp will include new experimental LSU and
pipe headers.

Related PR for extension documents: #5027

Test: intel/llvm-test-suite#596
  • Loading branch information
shuoniu-intel authored Dec 8, 2021
1 parent 35729a7 commit a3e9aab
Show file tree
Hide file tree
Showing 4 changed files with 497 additions and 0 deletions.
177 changes: 177 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/fpga_lsu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//==-------------- fpga_lsu.hpp --- SYCL FPGA LSU Extensions ---------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include "fpga_utils.hpp"
#include <CL/sycl/detail/defines.hpp>
#include <CL/sycl/pointers.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace intel {
namespace experimental {

constexpr uint8_t BURST_COALESCE = 0x1;
constexpr uint8_t CACHE = 0x2;
constexpr uint8_t STATICALLY_COALESCE = 0x4;
constexpr uint8_t PREFETCH = 0x8;

template <int32_t _N> struct burst_coalesce_impl {
static constexpr int32_t value = _N;
static constexpr int32_t default_value = 0;
};

template <int32_t _N> struct cache {
static constexpr int32_t value = _N;
static constexpr int32_t default_value = 0;
};

template <int32_t _N> struct prefetch_impl {
static constexpr int32_t value = _N;
static constexpr int32_t default_value = 0;
};

template <int32_t _N> struct statically_coalesce_impl {
static constexpr int32_t value = _N;
static constexpr int32_t default_value = 1;
};

template <bool _B> using burst_coalesce = burst_coalesce_impl<_B>;
template <bool _B> using prefetch = prefetch_impl<_B>;
template <bool _B> using statically_coalesce = statically_coalesce_impl<_B>;

template <class... _mem_access_params> class lsu final {
public:
lsu() = delete;

template <class... _Params, typename _T, access::address_space _space>
static _T load(sycl::multi_ptr<_T, _space> Ptr) {
check_space<_space>();
check_load();
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
static constexpr auto _anchor_id =
_GetValue<int32_t, latency_anchor_id, _Params...>::value;
static constexpr auto _constraint =
_GetValue3<int32_t, type, int32_t, latency_constraint,
_Params...>::value;

static constexpr int32_t _target_anchor = std::get<0>(_constraint);
static constexpr type _control_type = std::get<1>(_constraint);
static constexpr int32_t _cycle = std::get<2>(_constraint);
int32_t _type = 0; // Default: _control_type == type::none
if constexpr (_control_type == type::exact) {
_type = 1;
} else if constexpr (_control_type == type::max) {
_type = 2;
} else if constexpr (_control_type == type::min) {
_type = 3;
}

return *__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor,
_type, _cycle);
#else
return *Ptr;
#endif
}

template <class... _Params, typename _T, access::address_space _space>
static void store(sycl::multi_ptr<_T, _space> Ptr, _T Val) {
check_space<_space>();
check_store();
#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
static constexpr auto _anchor_id =
_GetValue<int32_t, latency_anchor_id, _Params...>::value;
static constexpr auto _constraint =
_GetValue3<int32_t, type, int32_t, latency_constraint,
_Params...>::value;

static constexpr int32_t _target_anchor = std::get<0>(_constraint);
static constexpr type _control_type = std::get<1>(_constraint);
static constexpr int32_t _cycle = std::get<2>(_constraint);
int32_t _type = 0; // Default: _control_type == type::none
if constexpr (_control_type == type::exact) {
_type = 1;
} else if constexpr (_control_type == type::max) {
_type = 2;
} else if constexpr (_control_type == type::min) {
_type = 3;
}

*__latency_control_mem_wrapper((_T *)Ptr, _anchor_id, _target_anchor, _type,
_cycle) = Val;
#else
*Ptr = Val;
#endif
}

private:
static constexpr int32_t _burst_coalesce_val =
_GetValue<int32_t, burst_coalesce_impl, _mem_access_params...>::value;
static constexpr uint8_t _burst_coalesce =
_burst_coalesce_val == 1 ? BURST_COALESCE : 0;

static constexpr int32_t _cache_val =
_GetValue<int32_t, cache, _mem_access_params...>::value;
static constexpr uint8_t _cache = (_cache_val > 0) ? CACHE : 0;

static constexpr int32_t _statically_coalesce_val =
_GetValue<int32_t, statically_coalesce_impl,
_mem_access_params...>::value;
static constexpr uint8_t _dont_statically_coalesce =
_statically_coalesce_val == 0 ? STATICALLY_COALESCE : 0;

static constexpr int32_t _prefetch_val =
_GetValue<int32_t, prefetch_impl, _mem_access_params...>::value;
static constexpr uint8_t _prefetch = _prefetch_val ? PREFETCH : 0;

static_assert(_cache_val >= 0, "cache size parameter must be non-negative");

template <access::address_space _space> static void check_space() {
static_assert(_space == access::address_space::global_space ||
_space == access::address_space::global_device_space ||
_space == access::address_space::global_host_space,
"lsu controls are only supported for global_ptr, "
"device_ptr, and host_ptr objects");
}

static void check_load() {
static_assert(_cache == 0 || _burst_coalesce == BURST_COALESCE,
"unable to implement a cache without a burst coalescer");
static_assert(_prefetch == 0 || _burst_coalesce == 0,
"unable to implement a prefetcher and a burst coalescer "
"simulataneously");
static_assert(
_prefetch == 0 || _cache == 0,
"unable to implement a prefetcher and a cache simulataneously");
}
static void check_store() {
static_assert(_cache == 0, "unable to implement a store LSU with a cache.");
static_assert(_prefetch == 0,
"unable to implement a store LSU with a prefetcher.");
}

#if defined(__SYCL_DEVICE_ONLY__) && __has_builtin(__builtin_intel_fpga_mem)
// FPGA BE will recognize this function and extract its arguments.
// TODO: Pass latency control params via __builtin_intel_fpga_mem when ready.
template <typename _T>
static _T *__latency_control_mem_wrapper(_T *Ptr, int32_t AnchorID,
int32_t TargetAnchor, int32_t Type,
int32_t Cycle) {
return __builtin_intel_fpga_mem(
Ptr, _burst_coalesce | _cache | _dont_statically_coalesce | _prefetch,
_cache_val);
}
#endif
};

} // namespace experimental
} // namespace intel
} // namespace ext
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
105 changes: 105 additions & 0 deletions sycl/include/sycl/ext/intel/experimental/fpga_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
//==------------- fpga_utils.hpp --- SYCL FPGA Reg Extensions --------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/detail/defines.hpp>
#include <CL/sycl/detail/stl_type_traits.hpp>
#include <CL/sycl/stl.hpp>
#include <tuple>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace intel {
namespace experimental {

enum class type {
none, // default
exact,
max,
min
};

template <int32_t _N> struct latency_anchor_id {
static constexpr int32_t value = _N;
static constexpr int32_t default_value = -1;
};

template <int32_t _N1, type _N2, int32_t _N3> struct latency_constraint {
static constexpr std::tuple<int32_t, type, int32_t> value = {_N1, _N2, _N3};
static constexpr std::tuple<int32_t, type, int32_t> default_value = {
0, type::none, 0};
};

using ignoreParam_int_t = int32_t;
constexpr ignoreParam_int_t IgnoreParamInt{};
using ignoreParam_enum_t = type;
constexpr ignoreParam_enum_t IgnoreParamEnum{};

template <class _VType, class _T> struct _ValueExtractorImp {
static constexpr auto _First = _T::value;
static constexpr auto _Second = IgnoreParamEnum;
static constexpr auto _Third = IgnoreParamInt;
};

template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird, class _T>
struct _ValueExtractorImp<
const std::tuple<_VTypeFirst, _VTypeSecond, _VTypeThird>, _T> {
static constexpr auto _First = std::get<0>(_T::value);
static constexpr auto _Second = std::get<1>(_T::value);
static constexpr auto _Third = std::get<2>(_T::value);
};

template <class _T>
struct _ValueExtractor : _ValueExtractorImp<decltype(_T::value), _T> {};

template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
class _T>
struct _MatchType
: std::is_same<
_Type<_ValueExtractor<_T>::_First, _ValueExtractor<_T>::_Second,
_ValueExtractor<_T>::_Third>,
_T> {};

template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
class... _T>
struct _GetValue3 {
static constexpr auto value =
_Type<_VTypeFirst{}, _VTypeSecond{}, _VTypeThird{}>::default_value;
};

template <class _VTypeFirst, class _VTypeSecond, class _VTypeThird,
template <_VTypeFirst, _VTypeSecond, _VTypeThird> class _Type,
class _T1, class... _T>
struct _GetValue3<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T1, _T...> {
static constexpr auto value = std::conditional<
_MatchType<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T1>::value,
_T1, _GetValue3<_VTypeFirst, _VTypeSecond, _VTypeThird, _Type, _T...>>::
type::value;
};

template <class _VType, template <_VType> class _Type, class... _T>
struct _GetValue {
private:
template <_VType _V1, ignoreParam_enum_t, ignoreParam_int_t>
using _Type2 = _Type<_V1>;

public:
static constexpr auto value =
_GetValue3<_VType, ignoreParam_enum_t, ignoreParam_int_t, _Type2,
_T...>::value;
};

} // namespace experimental
} // namespace intel
} // namespace ext
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
Loading

0 comments on commit a3e9aab

Please sign in to comment.