Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DRAFT] Add erase for experimental::dynamic_map #577

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/cuco/detail/dynamic_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ void dynamic_map<Key, Value, Scope, Allocator>::reserve(std::size_t n, cudaStrea
{
int64_t num_elements_remaining = n;
uint32_t submap_idx = 0;

while (num_elements_remaining > 0) {
std::size_t submap_capacity;

Expand Down Expand Up @@ -130,7 +131,6 @@ void dynamic_map<Key, Value, Scope, Allocator>::insert(
auto constexpr tile_size = 4;

std::size_t num_to_insert = std::distance(first, last);

reserve(size_ + num_to_insert, stream);

uint32_t submap_idx = 0;
Expand Down
122 changes: 109 additions & 13 deletions include/cuco/detail/dynamic_map/dynamic_map.inl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <algorithm>
#include <cstddef>
#include <iostream>

namespace cuco {
namespace experimental {
Expand Down Expand Up @@ -67,6 +68,46 @@ constexpr dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
stream));
}

template <typename Key,
typename T,
typename Extent,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename Allocator,
typename Storage>
constexpr dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::
dynamic_map(Extent initial_capacity,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
erased_key<Key> erased_key_sentinel,
KeyEqual const& pred,
ProbingScheme const& probing_scheme,
cuda_thread_scope<Scope> scope,
Storage storage,
Allocator const& alloc,
cuda::stream_ref stream)
: size_{0},
capacity_{initial_capacity},
min_insert_size_{static_cast<size_type>(1E4)},
max_load_factor_{0.60},
alloc_{alloc}
{
submaps_.push_back(
std::make_unique<
cuco::static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>>(
initial_capacity,
empty_key_sentinel,
empty_value_sentinel,
erased_key_sentinel,
pred,
probing_scheme,
scope,
storage,
alloc,
stream));
}

template <typename Key,
typename T,
typename Extent,
Expand All @@ -80,18 +121,18 @@ void dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stor
InputIt first, InputIt last, cuda::stream_ref stream)
{
auto num_to_insert = cuco::detail::distance(first, last);

this->reserve(size_ + num_to_insert, stream);

uint32_t submap_idx = 0;
while (num_to_insert > 0) {
while (num_to_insert > 0 && submap_idx < submaps_.size()) {
auto& cur = submaps_[submap_idx];

auto capacity_remaining = max_load_factor_ * cur->capacity() - cur->size();
// If we are tying to insert some of the remaining keys into this submap, we can insert
// only if we meet the minimum insert size.
if (capacity_remaining >= min_insert_size_) {
auto const n = std::min(static_cast<detail::index_type>(capacity_remaining), num_to_insert);

std::size_t h_num_successes = cur->insert(first, first + n, stream);

size_ += h_num_successes;
Expand Down Expand Up @@ -126,21 +167,43 @@ void dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stor
else {
empty_key<Key> empty_key_sentinel{submaps_.front()->empty_key_sentinel()};
empty_value<T> empty_value_sentinel{submaps_.front()->empty_value_sentinel()};

erased_key<Key> erased_key_sentinel{submaps_.front()->erased_key_sentinel()};
if (erased_key_sentinel != empty_key_sentinel) {
submaps_.push_back(std::make_unique<map_type>(submap_capacity,
empty_key_sentinel,
empty_value_sentinel,
erased_key_sentinel,
KeyEqual{},
ProbingScheme{},
cuda_thread_scope<Scope>{},
Storage{},
alloc_,
stream));
} else {
submaps_.push_back(std::make_unique<map_type>(submap_capacity,
empty_key_sentinel,
empty_value_sentinel,
KeyEqual{},
ProbingScheme{},
cuda_thread_scope<Scope>{},
Storage{},
alloc_,
stream));
}
submap_capacity = capacity_;
submaps_.push_back(std::make_unique<map_type>(submap_capacity,
empty_key_sentinel,
empty_value_sentinel,
KeyEqual{},
ProbingScheme{},
cuda_thread_scope<Scope>{},
Storage{},
alloc_,
stream));
capacity_ *= 2;
}

num_elements_remaining -= max_load_factor_ * submap_capacity - min_insert_size_;
// for clamp to work
std::ptrdiff_t remaining =
static_cast<std::ptrdiff_t>(num_elements_remaining) -
static_cast<std::ptrdiff_t>(max_load_factor_ * submap_capacity - min_insert_size_);

// size_type is unsigned long. can underflow so clamp it
num_elements_remaining =
static_cast<size_type>(std::clamp(remaining,
static_cast<std::ptrdiff_t>(0),
static_cast<std::ptrdiff_t>(num_elements_remaining)));
submap_idx++;
}
}
Expand Down Expand Up @@ -176,5 +239,38 @@ void dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stor
}
}

template <typename Key,
typename T,
typename Extent,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename Allocator,
typename Storage>
template <typename InputIt>
void dynamic_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Storage>::erase(
InputIt first, InputIt last, cuda::stream_ref stream)
{
auto num_keys = cuco::detail::distance(first, last);
std::size_t traversed = 0;
uint32_t submap_idx = 0;
while (num_keys > 0 && submap_idx < submaps_.size()) {
const auto& cur = submaps_[submap_idx];
const size_t cur_size = cur->size();
const size_t num_keys_to_process =
std::min(static_cast<detail::index_type>(cur_size), num_keys);
CUCO_CUDA_TRY(cudaStreamSynchronize(stream.get()));

cur->erase(first, first + num_keys_to_process, stream);

traversed += num_keys_to_process;
num_keys -= num_keys_to_process;
submap_idx++;
first += num_keys_to_process;

size_ -= num_keys_to_process;
}
}

} // namespace experimental
} // namespace cuco
75 changes: 75 additions & 0 deletions include/cuco/dynamic_map.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,38 @@ class dynamic_map {
Allocator const& alloc = {},
cuda::stream_ref stream = {});

/**
* @brief Constructs a dynamically-sized map with erase capability.
*
* The capacity of the map will automatically increase as the user adds key/value pairs using
* `insert`.
*
* Capacity increases by a factor of growth_factor each time the size of the map exceeds a
* threshold occupancy. The performance of `find` and `contains` gradually decreases each time the
* map's capacity grows.
*
* @param initial_capacity The initial number of slots in the map
* @param empty_key_sentinel The reserved key value for empty slots
* @param empty_value_sentinel The reserved mapped value for empty slots
* @param erased_key_sentinel The reserved key value for empty slots
* @param pred Key equality binary predicate
* @param probing_scheme Probing scheme
* @param scope The scope in which operations will be performed
* @param storage Kind of storage to use
* @param alloc Allocator used for allocating device storage
* @param stream CUDA stream used to initialize the map
*/
constexpr dynamic_map(Extent initial_capacity,
empty_key<Key> empty_key_sentinel,
empty_value<T> empty_value_sentinel,
erased_key<Key> erased_key_sentinel,
KeyEqual const& pred = {},
ProbingScheme const& probing_scheme = {},
cuda_thread_scope<Scope> scope = {},
Storage storage = {},
Allocator const& alloc = {},
cuda::stream_ref stream = {});

/**
* @brief Grows the capacity of the map so there is enough space for `n` key/value pairs.
*
Expand Down Expand Up @@ -157,6 +189,49 @@ class dynamic_map {
OutputIt output_begin,
cuda::stream_ref stream = {}) const;

/**
* @brief Erases keys in the range `[first, last)`.
*
* For each key `k` in `[first, last)`, if `contains(k) == true), removes `k` and it's
* associated value from the map. Else, no effect.
*
* Side-effects:
* - `contains(k) == false`
* - `find(k) == end()`
* - `insert({k,v}) == true`
* - `get_size()` is reduced by the total number of erased keys
*
* This function synchronizes `stream`.
*
* Keep in mind that `erase` does not cause the map to shrink its memory allocation.
*
* @tparam InputIt Device accessible input iterator whose `value_type` is
* convertible to the map's `value_type`
*
* @param first Beginning of the sequence of keys
* @param last End of the sequence of keys
* @param stream Stream used for executing the kernels
*
* @throw std::runtime_error if a unique erased key sentinel value was not
* provided at construction
*/
template <typename InputIt>
void erase(InputIt first, InputIt last, cuda::stream_ref stream = {});

/**
* @brief Gets the current number of elements in the map
*
* @return The current number of elements in the map
*/
size_type get_size() const noexcept { return size_; }

/**
* @brief Gets the maximum number of elements the hash map can hold.
*
* @return The maximum number of elements the hash map can hold
*/
size_type get_capacity() const noexcept { return capacity_; }

private:
size_type size_{}; ///< Number of keys in the map
size_type capacity_{}; ///< Maximum number of keys that can be inserted
Expand Down
3 changes: 2 additions & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ ConfigureTest(STATIC_MAP_TEST
ConfigureTest(DYNAMIC_MAP_TEST
dynamic_map/unique_sequence_test.cu
dynamic_map/unique_sequence_test_experimental.cu
dynamic_map/erase_test.cu)
dynamic_map/erase_test.cu
dynamic_map/erase_test_experimental.cu)

###################################################################################################
# - static_multiset tests -------------------------------------------------------------------------
Expand Down
Loading