Skip to content

Commit

Permalink
feat: implement advance_to and advance_back_to for Iter
Browse files Browse the repository at this point in the history
Implement the ability to efficiently skip ahead to a particular value.

Co-authored-by: Christian Schjølberg <[email protected]>
Co-authored-by: Matthew Herzl <[email protected]>
  • Loading branch information
3 people committed Nov 2, 2024
1 parent 07d6bc3 commit fab78b0
Show file tree
Hide file tree
Showing 5 changed files with 519 additions and 1 deletion.
10 changes: 10 additions & 0 deletions roaring/src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,16 @@ impl DoubleEndedIterator for Iter<'_> {

impl ExactSizeIterator for Iter<'_> {}

impl Iter<'_> {
pub(crate) fn advance_to(&mut self, index: u16) {
self.inner.advance_to(index);
}

pub(crate) fn advance_back_to(&mut self, index: u16) {
self.inner.advance_back_to(index);
}
}

impl fmt::Debug for Container {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter)
Expand Down
195 changes: 194 additions & 1 deletion roaring/src/bitmap/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use alloc::vec;
use core::iter::FusedIterator;
use core::slice;

use super::container::{self, Container};
use super::container::Container;
use super::{container, util};
use crate::{NonSortedIntegers, RoaringBitmap};

#[cfg(not(feature = "std"))]
Expand Down Expand Up @@ -31,16 +32,208 @@ fn and_then_or_clear<T, U>(opt: &mut Option<T>, f: impl FnOnce(&mut T) -> Option
x
}

fn advance_to_impl<'a, It>(
n: u32,
front_iter: &mut Option<container::Iter<'a>>,
containers: &mut It,
back_iter: &mut Option<container::Iter<'a>>,
) where
It: Iterator,
It: AsRef<[Container]>,
It::Item: IntoIterator<IntoIter = container::Iter<'a>>,
{
let (key, index) = util::split(n);
if let Some(iter) = front_iter {
match key.cmp(&iter.key) {
core::cmp::Ordering::Less => return,
core::cmp::Ordering::Equal => {
iter.advance_to(index);
return;
}
core::cmp::Ordering::Greater => {
*front_iter = None;
}
}
}
let containers_slice = containers.as_ref();
let containers_len = containers_slice.len();
let to_skip = match containers_slice.binary_search_by_key(&key, |c| c.key) {
Ok(n) => {
let container = containers.nth(n).expect("binary search returned a valid index");
let mut container_iter = container.into_iter();
container_iter.advance_to(index);
*front_iter = Some(container_iter);
return;
}
Err(n) => n,
};

if let Some(n) = to_skip.checked_sub(1) {
containers.nth(n);
}
if to_skip != containers_len {
// There are still containers with keys greater than the key we are looking for,
// the key we're looking _can't_ be in the back iterator.
return;
}
if let Some(iter) = back_iter {
match key.cmp(&iter.key) {
core::cmp::Ordering::Less => {}
core::cmp::Ordering::Equal => {
iter.advance_to(index);
}
core::cmp::Ordering::Greater => {
*back_iter = None;
}
}
}
}

fn advance_back_to_impl<'a, It>(
n: u32,
front_iter: &mut Option<container::Iter<'a>>,
containers: &mut It,
back_iter: &mut Option<container::Iter<'a>>,
) where
It: DoubleEndedIterator,
It: AsRef<[Container]>,
It::Item: IntoIterator<IntoIter = container::Iter<'a>>,
{
let (key, index) = util::split(n);
if let Some(iter) = back_iter {
match key.cmp(&iter.key) {
core::cmp::Ordering::Greater => return,
core::cmp::Ordering::Equal => {
iter.advance_back_to(index);
return;
}
core::cmp::Ordering::Less => {
*back_iter = None;
}
}
}
let containers_slice = containers.as_ref();
let containers_len = containers_slice.len();
let to_skip = match containers_slice.binary_search_by_key(&key, |c| c.key) {
Ok(n) => {
// n must be less than containers_len, so this can never underflow
let n = containers_len - n - 1;
let container = containers.nth_back(n).expect("binary search returned a valid index");
let mut container_iter = container.into_iter();
container_iter.advance_back_to(index);
*back_iter = Some(container_iter);
return;
}
Err(n) => containers_len - n,
};

if let Some(n) = to_skip.checked_sub(1) {
containers.nth_back(n);
}
if to_skip != containers_len {
// There are still containers with keys less than the key we are looking for,
// the key we're looking _can't_ be in the front iterator.
return;
}
if let Some(iter) = front_iter {
match key.cmp(&iter.key) {
core::cmp::Ordering::Greater => {}
core::cmp::Ordering::Equal => {
iter.advance_back_to(index);
}
core::cmp::Ordering::Less => {
*front_iter = None;
}
}
}
}

impl Iter<'_> {
fn new(containers: &[Container]) -> Iter {
Iter { front: None, containers: containers.iter(), back: None }
}

/// Advance the iterator to the first position where the item has a value >= `n`
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
/// use core::iter::FromIterator;
///
/// let bitmap = (1..3).collect::<RoaringBitmap>();
/// let mut iter = bitmap.iter();
/// iter.advance_to(2);
///
/// assert_eq!(iter.next(), Some(2));
/// assert_eq!(iter.next(), None);
/// ```
pub fn advance_to(&mut self, n: u32) {
advance_to_impl(n, &mut self.front, &mut self.containers, &mut self.back);
}

/// Advance the back of the iterator to the first position where the item has a value <= `n`
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
/// use core::iter::FromIterator;
///
/// let bitmap = (1..3).collect::<RoaringBitmap>();
/// let mut iter = bitmap.iter();
/// iter.advance_back_to(1);
///
/// assert_eq!(iter.next_back(), Some(1));
/// assert_eq!(iter.next_back(), None);
/// ```
pub fn advance_back_to(&mut self, n: u32) {
advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back);
}
}

impl IntoIter {
fn new(containers: Vec<Container>) -> IntoIter {
IntoIter { front: None, containers: containers.into_iter(), back: None }
}

/// Advance the iterator to the first position where the item has a value >= `n`
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
/// use core::iter::FromIterator;
///
/// let bitmap = (1..3).collect::<RoaringBitmap>();
/// let mut iter = bitmap.iter();
/// iter.advance_to(2);
///
/// assert_eq!(iter.next(), Some(2));
/// assert_eq!(iter.next(), None);
/// ```
pub fn advance_to(&mut self, n: u32) {
advance_to_impl(n, &mut self.front, &mut self.containers, &mut self.back);
}

/// Advance the back of the iterator to the first position where the item has a value <= `n`
///
/// # Examples
///
/// ```rust
/// use roaring::RoaringBitmap;
/// use core::iter::FromIterator;
///
/// let bitmap = (1..3).collect::<RoaringBitmap>();
/// let mut iter = bitmap.into_iter();
/// iter.advance_back_to(1);
///
/// assert_eq!(iter.next_back(), Some(1));
/// assert_eq!(iter.next_back(), None);
/// ```
pub fn advance_back_to(&mut self, n: u32) {
advance_back_to_impl(n, &mut self.front, &mut self.containers, &mut self.back);
}
}

fn size_hint_impl(
Expand Down
62 changes: 62 additions & 0 deletions roaring/src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::fmt::{Display, Formatter};
use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign};

Expand Down Expand Up @@ -406,6 +407,7 @@ pub struct BitmapIter<B: Borrow<[u64; BITMAP_LENGTH]>> {
key: u16,
value: u64,
key_back: u16,
// If key_back <= key, current back value is actually in `value`
value_back: u64,
bits: B,
}
Expand All @@ -420,6 +422,66 @@ impl<B: Borrow<[u64; BITMAP_LENGTH]>> BitmapIter<B> {
bits,
}
}

/// Advance the iterator to the first value greater than or equal to `n`.
pub(crate) fn advance_to(&mut self, index: u16) {
let new_key = key(index) as u16;
let value = match new_key.cmp(&self.key) {
Ordering::Less => return,
Ordering::Equal => self.value,
Ordering::Greater => {
let bits = self.bits.borrow();
let cmp = new_key.cmp(&self.key_back);
// Match arms can be reordered, this ordering is perf sensitive
if cmp == Ordering::Less {
// new_key is > self.key, < self.key_back, so it must be in bounds
unsafe { *bits.get_unchecked(new_key as usize) }
} else if cmp == Ordering::Equal {
self.value_back
} else {
self.value_back = 0;
return;
}
}
};
let bit = bit(index);
let low_bits = (1 << bit) - 1;

self.key = new_key;
self.value = value & !low_bits;
}

/// Advance the back of iterator to the first value less than or equal to `n`.
pub(crate) fn advance_back_to(&mut self, index: u16) {
let new_key = key(index) as u16;
let (value, dst) = match new_key.cmp(&self.key_back) {
Ordering::Greater => return,
Ordering::Equal => {
let dst =
if self.key_back <= self.key { &mut self.value } else { &mut self.value_back };
(*dst, dst)
}
Ordering::Less => {
let bits = self.bits.borrow();
let cmp = new_key.cmp(&self.key);
// Match arms can be reordered, this ordering is perf sensitive
if cmp == Ordering::Greater {
// new_key is > self.key, < self.key_back, so it must be in bounds
let value = unsafe { *bits.get_unchecked(new_key as usize) };
(value, &mut self.value_back)
} else if cmp == Ordering::Equal {
(self.value, &mut self.value)
} else {
(0, &mut self.value)
}
}
};
let bit = bit(index);
let low_bits = u64::MAX >> (64 - bit - 1);

self.key_back = new_key;
*dst = value & low_bits;
}
}

impl<B: Borrow<[u64; BITMAP_LENGTH]>> Iterator for BitmapIter<B> {
Expand Down
45 changes: 45 additions & 0 deletions roaring/src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,51 @@ impl PartialEq for Store {
}
}

impl Iter<'_> {
/// Advance the iterator to the first value greater than or equal to `n`.
pub(crate) fn advance_to(&mut self, n: u16) {
match self {
Iter::Array(inner) => {
let skip = inner.as_slice().partition_point(|&i| i < n);
if let Some(nth) = skip.checked_sub(1) {
inner.nth(nth);
}
}
Iter::Vec(inner) => {
let skip = inner.as_slice().partition_point(|&i| i < n);
if let Some(nth) = skip.checked_sub(1) {
inner.nth(nth);
}
}
Iter::BitmapBorrowed(inner) => inner.advance_to(n),
Iter::BitmapOwned(inner) => inner.advance_to(n),
}
}

pub(crate) fn advance_back_to(&mut self, n: u16) {
match self {
Iter::Array(inner) => {
let slice = inner.as_slice();
let from_front = slice.partition_point(|&i| i <= n);
let skip = slice.len() - from_front;
if let Some(nth) = skip.checked_sub(1) {
inner.nth_back(nth);
}
}
Iter::Vec(inner) => {
let slice = inner.as_slice();
let from_front = slice.partition_point(|&i| i <= n);
let skip = slice.len() - from_front;
if let Some(nth) = skip.checked_sub(1) {
inner.nth_back(nth);
}
}
Iter::BitmapBorrowed(inner) => inner.advance_back_to(n),
Iter::BitmapOwned(inner) => inner.advance_back_to(n),
}
}
}

impl Iterator for Iter<'_> {
type Item = u16;

Expand Down
Loading

0 comments on commit fab78b0

Please sign in to comment.