diff --git a/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp b/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp index ea2cd3802fd..d6cdad2ab23 100644 --- a/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp +++ b/tpls/mdspan/include/experimental/__p0009_bits/layout_stride.hpp @@ -605,7 +605,7 @@ struct layout_stride { ) MDSPAN_INLINE_FUNCTION friend constexpr bool operator!=(const mapping& x, const StridedLayoutMapping& y) noexcept { - return not (x == y); + return !(x == y); } MDSPAN_TEMPLATE_REQUIRES( @@ -643,8 +643,8 @@ constexpr void validate_strides(with_rank<0>, Layout, const Extents&, const Mapp template constexpr void validate_strides(with_rank, Layout, const Extents& ext, const Mapping& other) { - static_assert(std::is_same::value and - (std::is_same::value or + static_assert(std::is_same::value && + (std::is_same::value || std::is_same::value) , "This function is only intended to validate construction of " "a layout_left or layout_right mapping from a layout_stride mapping."); @@ -657,7 +657,7 @@ constexpr void validate_strides(with_rank, Layout, const Extents& ext, const const std::size_t s = is_left ? r : N - 1 - r; MDSPAN_IMPL_PRECONDITION(common_integral_compare(expected_stride, other.stride(s)) - and "invalid strides for layout_{left,right}"); + && "invalid strides for layout_{left,right}"); expected_stride *= ext.extent(s); } diff --git a/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp b/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp index 30209a6648b..f7b9374417f 100644 --- a/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp +++ b/tpls/mdspan/include/experimental/__p0009_bits/macros.hpp @@ -151,7 +151,7 @@ namespace detail { template MDSPAN_FUNCTION constexpr void precondition(const char* cond, const char* file, unsigned line) { - if (not check) { return; } + if (!check) { return; } // in case the macro doesn't use the arguments for custom macros (void) cond; (void) file; @@ -164,7 +164,7 @@ MDSPAN_FUNCTION constexpr void precondition(const char* cond, const char* file, #define MDSPAN_IMPL_PRECONDITION(...) \ do { \ - if (not (__VA_ARGS__)) { \ + if (!(__VA_ARGS__)) { \ MDSPAN_IMPL_STANDARD_NAMESPACE::detail::precondition(#__VA_ARGS__, __FILE__, __LINE__); \ } \ } while (0) diff --git a/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp b/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp index af4848248d6..23114aa5506 100644 --- a/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp +++ b/tpls/mdspan/include/experimental/__p0009_bits/mdspan.hpp @@ -323,7 +323,7 @@ class mdspan #endif // MDSPAN_USE_PAREN_OPERATOR MDSPAN_INLINE_FUNCTION constexpr size_type size() const noexcept { - return __impl::__size(*this); + return static_cast(__impl::__size(*this)); }; MDSPAN_INLINE_FUNCTION constexpr bool empty() const noexcept { diff --git a/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp b/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp index ca821176f87..560b7b5026d 100644 --- a/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp +++ b/tpls/mdspan/include/experimental/__p0009_bits/utility.hpp @@ -16,7 +16,7 @@ using with_rank = std::integral_constant; template constexpr bool common_integral_compare(I1 x, I2 y) { - static_assert(std::is_integral::value and + static_assert(std::is_integral::value && std::is_integral::value, ""); using I = std::common_type_t; diff --git a/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp b/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp index cf1bdd1e56f..1cb098a1368 100644 --- a/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp +++ b/tpls/mdspan/include/experimental/__p2630_bits/submdspan_mapping.hpp @@ -182,6 +182,21 @@ struct deduce_layout_left_submapping< } }; +// We are reusing the same thing for layout_left and layout_left_padded +// For layout_left as source StaticStride is static_extent(0) +template +struct compute_s_static_layout_left { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx>0 && Idx<=NumGaps ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail // Actual submdspan mapping call @@ -202,14 +217,6 @@ layout_left::mapping::submdspan_mapping_impl( std::make_index_sequence, SliceSpecifiers...>; - using dst_layout_t = std::conditional_t< - deduce_layout::layout_left_value(), layout_left, - std::conditional_t< - deduce_layout::layout_left_padded_value(), - MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded, - layout_stride>>; - using dst_mapping_t = typename dst_layout_t::template mapping; - // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = @@ -218,17 +225,19 @@ layout_left::mapping::submdspan_mapping_impl( out_of_bounds ? this->required_span_size() : this->operator()(detail::first_of(slices)...)); - if constexpr (std::is_same_v) { + if constexpr (deduce_layout::layout_left_value()) { // layout_left case + using dst_mapping_t = typename layout_left::template mapping; return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; - } else if constexpr (std::is_same_v>) { + } else if constexpr (deduce_layout::layout_left_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; return submdspan_mapping_result{ dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; } else { // layout_stride case + using dst_mapping_t = typename layout_stride::mapping; auto inv_map = detail::inv_map_rank(std::integral_constant(), std::index_sequence<>(), slices...); return submdspan_mapping_result { @@ -253,6 +262,77 @@ layout_left::mapping::submdspan_mapping_impl( #endif } +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else { // general case + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_left_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_left_value() && dst_ext_t::rank() == 1) { // getting rank-1 from leftmost + using dst_mapping_t = typename layout_left::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_left_padded_value()) { // can keep layout_left_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_left_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(1 + deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + //********************************** // layout_right submdspan_mapping //********************************* @@ -322,6 +402,21 @@ struct deduce_layout_right_submapping< } }; +// We are reusing the same thing for layout_right and layout_right_padded +// For layout_right as source StaticStride is static_extent(Rank-1) +template +struct compute_s_static_layout_right { + // Neither StaticStride nor any of the provided extents can be zero. + // StaticStride can never be zero, the static_extents we are looking at are associated with + // integral slice specifiers - which wouldn't be valid for zero extent + template + MDSPAN_INLINE_FUNCTION + static constexpr size_t value(std::index_sequence) { + size_t val = ((Idx >= Extents::rank() - 1 - NumGaps && Idx < Extents::rank() - 1 ? (Extents::static_extent(Idx) == dynamic_extent?0:Extents::static_extent(Idx)) : 1) * ... * (StaticStride == dynamic_extent?0:StaticStride)); + return val == 0?dynamic_extent:val; + } +}; + } // namespace detail // Actual submdspan mapping call @@ -342,14 +437,6 @@ layout_right::mapping::submdspan_mapping_impl( std::make_index_sequence, SliceSpecifiers...>; - using dst_layout_t = std::conditional_t< - deduce_layout::layout_right_value(), layout_right, - std::conditional_t< - deduce_layout::layout_right_padded_value(), - MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded, - layout_stride>>; - using dst_mapping_t = typename dst_layout_t::template mapping; - // Figure out if any slice's lower bound equals the corresponding extent. // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. const bool out_of_bounds = @@ -358,20 +445,21 @@ layout_right::mapping::submdspan_mapping_impl( out_of_bounds ? this->required_span_size() : this->operator()(detail::first_of(slices)...)); - if constexpr (std::is_same_v) { + if constexpr (deduce_layout::layout_right_value()) { // layout_right case + using dst_mapping_t = typename layout_right::mapping; return submdspan_mapping_result{dst_mapping_t(dst_ext), offset}; - } else if constexpr (std::is_same_v< - dst_layout_t, - MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded< - dynamic_extent>>) { + } else if constexpr (deduce_layout::layout_right_padded_value()) { + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_left::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; return submdspan_mapping_result{ dst_mapping_t(dst_ext, stride(src_ext_t::rank() - 2 - deduce_layout::gap_len)), offset}; } else { // layout_stride case + using dst_mapping_t = typename layout_stride::mapping; auto inv_map = detail::inv_map_rank(std::integral_constant(), std::index_sequence<>(), slices...); return submdspan_mapping_result { @@ -396,6 +484,77 @@ layout_right::mapping::submdspan_mapping_impl( #endif } +template +template +template +MDSPAN_INLINE_FUNCTION constexpr auto +MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::mapping::submdspan_mapping_impl( + SliceSpecifiers... slices) const { + + // compute sub extents + using src_ext_t = Extents; + auto dst_ext = submdspan_extents(extents(), slices...); + using dst_ext_t = decltype(dst_ext); + + if constexpr (Extents::rank() == 0) { // rank-0 case + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{*this, 0}; + } else { + // Figure out if any slice's lower bound equals the corresponding extent. + // If so, bypass evaluating the layout mapping. This fixes LWG Issue 4060. + // figure out sub layout type + const bool out_of_bounds = + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::any_slice_out_of_bounds(this->extents(), slices...); + auto offset = static_cast( + out_of_bounds ? this->required_span_size() + : this->operator()(MDSPAN_IMPL_STANDARD_NAMESPACE::detail::first_of(slices)...)); + if constexpr (dst_ext_t::rank() == 0) { // result rank-0 + using dst_mapping_t = typename layout_right::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else { // general case + using deduce_layout = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::deduce_layout_right_submapping< + typename dst_ext_t::index_type, dst_ext_t::rank(), + decltype(std::make_index_sequence()), + SliceSpecifiers...>; + + if constexpr (deduce_layout::layout_right_value() && dst_ext_t::rank() == 1) { // getting rank-1 from rightmost + using dst_mapping_t = typename layout_right::template mapping; + return submdspan_mapping_result{dst_mapping_t{dst_ext}, offset}; + } else if constexpr (deduce_layout::layout_right_padded_value()) { // can keep layout_right_padded + constexpr size_t S_static = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::compute_s_static_layout_right::value(std::make_index_sequence()); + using dst_mapping_t = typename MDSPAN_IMPL_PROPOSED_NAMESPACE::layout_right_padded::template mapping; + return submdspan_mapping_result{ + dst_mapping_t(dst_ext, stride(Extents::rank() - 2 - deduce_layout::gap_len)), offset}; + } else { // layout_stride + auto inv_map = MDSPAN_IMPL_STANDARD_NAMESPACE::detail::inv_map_rank(std::integral_constant(), + std::index_sequence<>(), slices...); + using dst_mapping_t = typename layout_stride::template mapping; + return submdspan_mapping_result { + dst_mapping_t(dst_ext, + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::construct_sub_strides( + *this, inv_map, +// HIP needs deduction guides to have markups so we need to be explicit +// NVCC 11.0 has a bug with deduction guide here, tested that 11.2 does not have +// the issue But Clang-CUDA also doesn't accept the use of deduction guide so +// disable it for CUDA alltogether +#if defined(_MDSPAN_HAS_HIP) || defined(_MDSPAN_HAS_CUDA) + std::tuple{ + MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#else + std::tuple{MDSPAN_IMPL_STANDARD_NAMESPACE::detail::stride_of(slices)...})), +#endif + offset + }; + } + } + } + + +#if defined(__NVCC__) && !defined(__CUDA_ARCH__) && defined(__GNUC__) + __builtin_unreachable(); +#endif +} + //********************************** // layout_stride submdspan_mapping //********************************* diff --git a/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp b/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp index 99e24fa450a..1291c4d7fc6 100644 --- a/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp +++ b/tpls/mdspan/include/experimental/__p2642_bits/layout_padded.hpp @@ -73,7 +73,7 @@ struct static_array_type_for_padded_extent using extents_type = _Extents; using type = ::MDSPAN_IMPL_STANDARD_NAMESPACE::detail::maybe_static_array< index_type, size_t, dynamic_extent, - detail::get_actual_static_padding_value()>; }; @@ -221,7 +221,7 @@ class layout_left_padded::mapping { #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -497,10 +497,12 @@ class layout_left_padded::mapping { // [mdspan.submdspan.mapping], submdspan mapping specialization template + MDSPAN_INLINE_FUNCTION constexpr auto submdspan_mapping_impl( SliceSpecifiers... slices) const; template + MDSPAN_INLINE_FUNCTION friend constexpr auto submdspan_mapping( const mapping& src, SliceSpecifiers... slices) { return src.submdspan_mapping_impl(slices...); @@ -582,7 +584,7 @@ class layout_right_padded::mapping { #endif MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping(const mapping&) noexcept = default; - MDSPAN_INLINE_FUNCTION_DEFAULTED mapping& operator=(const mapping&) noexcept = default; + MDSPAN_INLINE_FUNCTION_DEFAULTED constexpr mapping& operator=(const mapping&) noexcept = default; /** * Initializes the mapping with the given extents. @@ -847,6 +849,19 @@ class layout_right_padded::mapping { return !(left == right); } #endif + + // [mdspan.submdspan.mapping], submdspan mapping specialization + template + MDSPAN_INLINE_FUNCTION + constexpr auto submdspan_mapping_impl( + SliceSpecifiers... slices) const; + + template + MDSPAN_INLINE_FUNCTION + friend constexpr auto submdspan_mapping( + const mapping& src, SliceSpecifiers... slices) { + return src.submdspan_mapping_impl(slices...); + } }; } } diff --git a/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp b/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp index b5eaac952bc..18daa28cc68 100644 --- a/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp +++ b/tpls/mdspan/include/experimental/__p2642_bits/layout_padded_fwd.hpp @@ -102,7 +102,7 @@ constexpr void check_padded_layout_converting_constructor_mandates(MDSPAN_IMPL_S (extents_type::static_extent(idx) != dynamic_extent) && (padding_value != dynamic_extent); - static_assert(not statically_determinable or + static_assert(!statically_determinable || (padding_value == 0 ? _LayoutExtentsType::static_extent(idx) == 0 : _LayoutExtentsType::static_extent(idx) % padding_value == 0),