diff --git a/roaring/src/bitmap/serialization.rs b/roaring/src/bitmap/serialization.rs index f38f89f6..b618866a 100644 --- a/roaring/src/bitmap/serialization.rs +++ b/roaring/src/bitmap/serialization.rs @@ -413,11 +413,17 @@ mod test { assert_eq!(rb.min(), Some(CONTAINER_OFFSET + 8)); - // Ensure we can set the last byte + // Ensure we can set the last byte in an array container let bytes = [0x80]; let rb = RoaringBitmap::from_bitmap_bytes(0xFFFFFFF8, &bytes); assert_eq!(rb.len(), 1); assert!(rb.contains(u32::MAX)); + + // Ensure we can set the last byte in a bitmap container + let bytes = vec![0xFF; 0x1_0000 / 8]; + let rb = RoaringBitmap::from_bitmap_bytes(0xFFFF0000, &bytes); + assert_eq!(rb.len(), 0x1_0000); + assert!(rb.contains(u32::MAX)); } #[test] diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index 7151f74d..f2084092 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -38,27 +38,41 @@ impl BitmapStore { } pub fn from_lsb0_bytes_unchecked(bytes: &[u8], byte_offset: usize, bits_set: u64) -> Self { - assert!(byte_offset + bytes.len() <= BITMAP_LENGTH * size_of::()); - - let mut bits = Box::new([0u64; BITMAP_LENGTH]); - // Safety: It's safe to reinterpret u64s as u8s because u8 has less alignment requirements, - // and has no padding/uninitialized data. - let dst = unsafe { - std::slice::from_raw_parts_mut( - bits.as_mut_ptr().cast::(), - BITMAP_LENGTH * size_of::(), - ) + const BITMAP_BYTES: usize = BITMAP_LENGTH * size_of::(); + assert!(byte_offset.checked_add(bytes.len()).map_or(false, |sum| sum <= BITMAP_BYTES)); + + // If we know we're writing the full bitmap, we can avoid the initial memset to 0 + let mut bits = if bytes.len() == BITMAP_BYTES { + debug_assert_eq!(byte_offset, 0); // Must be true from the above assert + + // Safety: We've checked that the length is correct, and we use an unaligned load in case + // the bytes are not 8 byte aligned. + // The optimizer can see through this, and avoid the double copy to copy directly into + // the allocated box from bytes with memcpy + let bytes_as_words = + unsafe { bytes.as_ptr().cast::<[u64; BITMAP_LENGTH]>().read_unaligned() }; + Box::new(bytes_as_words) + } else { + let mut bits = Box::new([0u64; BITMAP_LENGTH]); + // Safety: It's safe to reinterpret u64s as u8s because u8 has less alignment requirements, + // and has no padding/uninitialized data. + let dst = unsafe { + std::slice::from_raw_parts_mut(bits.as_mut_ptr().cast::(), BITMAP_BYTES) + }; + let dst = &mut dst[byte_offset..][..bytes.len()]; + dst.copy_from_slice(bytes); + bits }; - let dst = &mut dst[byte_offset..][..bytes.len()]; - dst.copy_from_slice(bytes); - let start_word = byte_offset / size_of::(); - let end_word = (byte_offset + bytes.len() + (size_of::() - 1)) / size_of::(); + if !cfg!(target_endian = "little") { + // Convert all words we touched (even partially) to little-endian + let start_word = byte_offset / size_of::(); + let end_word = (byte_offset + bytes.len() + (size_of::() - 1)) / size_of::(); - // The 0th byte is the least significant byte, so we've written the bytes in little-endian - // order, convert to native endian. Expect this to get optimized away for little-endian. - for word in &mut bits[start_word..end_word] { - *word = u64::from_le(*word); + // The 0th byte is the least significant byte, so we've written the bytes in little-endian + for word in &mut bits[start_word..end_word] { + *word = u64::from_le(*word); + } } Self::from_unchecked(bits_set, bits)