Skip to content

Commit

Permalink
Report errors when usize overflows.
Browse files Browse the repository at this point in the history
This makes it now safe to use on 16-bit systems, with one small
exception in V2DeflateSerializer.

This necessitated adding a few error types to things that used to
overflow. Various other little cleanups performed while following
the logic around.

Fixes HdrHistogram#17.
  • Loading branch information
Marshall Pierce committed Apr 13, 2017
1 parent d17f12d commit 010f677
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 108 deletions.
4 changes: 2 additions & 2 deletions src/iterators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ pub mod all;
/// A trait for designing an subset iterator over values in a `Histogram`.
pub trait PickyIterator<T: Counter> {
/// should an item be yielded for the given index?
fn pick(&mut self, usize, u64) -> bool;
fn pick(&mut self, index: usize, total_count_to_index: u64) -> bool;
/// should we keep iterating even though all future indices are zeros?
fn more(&mut self, usize) -> bool;
fn more(&mut self, index: usize) -> bool;
}

/// `HistogramIterator` provides a base iterator for a `Histogram`.
Expand Down
34 changes: 25 additions & 9 deletions src/iterators/percentile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@ use iterators::{HistogramIterator, PickyIterator};
pub struct Iter<'a, T: 'a + Counter> {
hist: &'a Histogram<T>,

percentile_ticks_per_half_distance: isize,
percentile_ticks_per_half_distance: u32,
percentile_level_to_iterate_to: f64,
reached_last_recorded_value: bool,
}

impl<'a, T: 'a + Counter> Iter<'a, T> {
/// Construct a new percentile iterator. See `Histogram::iter_percentiles` for details.
pub fn new(hist: &'a Histogram<T>,
percentile_ticks_per_half_distance: isize)
percentile_ticks_per_half_distance: u32)
-> HistogramIterator<'a, T, Iter<'a, T>> {
assert!(percentile_ticks_per_half_distance > 0, "Ticks per half distance must be > 0");

HistogramIterator::new(hist,
Iter {
hist: hist,
Expand Down Expand Up @@ -44,17 +46,31 @@ impl<'a, T: 'a + Counter> PickyIterator<T> for Iter<'a, T> {
// much easier to browse through in a percentile distribution output, for example.
//
// We calculate the number of equal-sized "ticks" that the 0-100 range will be divided by
// at the current scale. The scale is detemined by the percentile level we are iterating
// at the current scale. The scale is determined by the percentile level we are iterating
// to. The following math determines the tick size for the current scale, and maintain a
// fixed tick size for the remaining "half the distance to 100%" [from either 0% or from
// the previous half-distance]. When that half-distance is crossed, the scale changes and
// the tick size is effectively cut in half.

let percentile_reporting_ticks =
self.percentile_ticks_per_half_distance *
2_f64.powi(((100.0 / (100.0 - self.percentile_level_to_iterate_to)).ln() /
2_f64.ln()) as i32 + 1) as isize;
self.percentile_level_to_iterate_to += 100.0 / percentile_reporting_ticks as f64;
//
// Calculate the number of times we've halved the distance to 100%, This is 1 at 50%, 2 at
// 75%, 3 at 87.5%, etc. 2 ^ num_halvings is the number of slices that will fit into 100%.
// At 50%, num_halvings would be 1, so 2 ^ 1 would yield 2 slices, etc. At any given number
// of slices, the last slice is what we're going to traverse the first half of. With 1 total
// slice, traverse half to get to 50%. Then traverse half of the last (second) slice to get
// to 75%, etc.
// Minimum of 0 (100.0/100.0 = 1, log 2 of which is 0) so unsigned cast is safe.
let num_halvings = (100.0 / (100.0 - self.percentile_level_to_iterate_to)).log2() as u32;
// Calculate the total number of ticks in 0-100% given that half of each slice is tick'd.
// The number of slices is 2 ^ num_halvings, and each slice has two "half distances" to
// tick, so we add an extra power of two to get ticks per whole distance.
// Use u64 math so that there's less risk of overflow with large numbers of ticks and data
// that ends up needing large numbers of halvings.
// TODO calculate the worst case total_ticks and make sure we can't ever overflow here
let total_ticks = (self.percentile_ticks_per_half_distance as u64)
.checked_mul(1_u64.checked_shl(num_halvings + 1).expect("too many halvings"))
.expect("too many total ticks");
let increment_size = 100.0 / total_ticks as f64;
self.percentile_level_to_iterate_to += increment_size;
true
}

Expand Down
222 changes: 143 additions & 79 deletions src/lib.rs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/serialization/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ impl Deserializer {
// Read with fast loop until we are within 9 of the end. Fast loop can't handle EOF,
// so bail to slow version for the last few bytes.

// payload_index math is safe because payload_len is a usize
let (zz_num, bytes_read) = varint_read_slice(
&payload_slice[payload_index..(payload_index + 9)]);
payload_index += bytes_read;
Expand Down
5 changes: 4 additions & 1 deletion src/serialization/v2_deflate_serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ impl V2DeflateSerializer {

// TODO pluggable compressors? configurable compression levels?
// TODO benchmark https://github.com/sile/libflate
// TODO if uncompressed_len is near the limit of 16-bit usize, and compression grows the
// data instead of shrinking it (which we cannot really predict), writing to compressed_buf
// could panic as Vec overflows its internal `usize`.

{
// TODO reuse deflate buf, or switch to lower-level flate2::Compress
Expand All @@ -80,7 +83,7 @@ impl V2DeflateSerializer {

// fill in length placeholder. Won't underflow since length is always at least 8, and won't
// overflow u32 as the largest array is about 6 million entries, so about 54MiB encoded (if
// counter is u64)
// counter is u64).
let total_compressed_len = self.compressed_buf.len();
(&mut self.compressed_buf[4..8]).write_u32::<BigEndian>((total_compressed_len as u32) - 8)?;

Expand Down
11 changes: 7 additions & 4 deletions src/serialization/v2_serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ impl V2Serializer {
}

let counts_len = encode_counts(h, &mut self.buf[V2_HEADER_SIZE..])?;
// addition should be safe as max_size is already a usize
let total_len = V2_HEADER_SIZE + counts_len;

// TODO benchmark fastest buffer management scheme
Expand All @@ -81,7 +82,8 @@ impl V2Serializer {
}

fn max_encoded_size<T: Counter>(h: &Histogram<T>) -> Option<usize> {
counts_array_max_encoded_size(h.index_for(h.max()) + 1)
h.index_for(h.max())
.and_then(|i| counts_array_max_encoded_size(i + 1))
.and_then(|x| x.checked_add(V2_HEADER_SIZE))
}

Expand All @@ -98,13 +100,13 @@ pub fn counts_array_max_encoded_size(length: usize) -> Option<usize> {
/// Encode counts array into slice.
/// The slice must be at least 9 * the number of counts that will be encoded.
pub fn encode_counts<T: Counter>(h: &Histogram<T>, buf: &mut [u8]) -> Result<usize, V2SerializeError> {
let index_limit = h.index_for(h.max()) + 1;
let index_limit = h.index_for(h.max()).expect("Index for max value must exist");
let mut index = 0;
let mut bytes_written = 0;

assert!(index_limit <= h.counts.len());

while index < index_limit {
while index <= index_limit {
// index is inside h.counts because of the assert above
let count = unsafe { *(h.counts.get_unchecked(index)) };
index += 1;
Expand All @@ -117,7 +119,7 @@ pub fn encode_counts<T: Counter>(h: &Histogram<T>, buf: &mut [u8]) -> Result<usi
zero_count = 1;

// index is inside h.counts because of the assert above
while (index < index_limit) && (unsafe { *(h.counts.get_unchecked(index)) } == T::zero()) {
while (index <= index_limit) && (unsafe { *(h.counts.get_unchecked(index)) } == T::zero()) {
zero_count += 1;
index += 1;
}
Expand All @@ -137,6 +139,7 @@ pub fn encode_counts<T: Counter>(h: &Histogram<T>, buf: &mut [u8]) -> Result<usi

let zz = zig_zag_encode(count_or_zeros);

// this can't be longer than the length of `buf`, so this won't overflow `usize`
bytes_written += varint_write(zz, &mut buf[bytes_written..]);
}

Expand Down
16 changes: 8 additions & 8 deletions src/tests/index_calculation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,44 +328,44 @@ fn sub_bucket_for_value_above_biggest_still_works() {
#[test]
fn index_for_first_bucket_first_entry() {
let h = histo64(1, 100_000, 3);
assert_eq!(0, h.index_for(0));
assert_eq!(0, h.index_for(0).unwrap());
}

#[test]
fn index_for_first_bucket_first_distinguishable_entry() {
let h = histo64(1, 100_000, 3);
assert_eq!(1, h.index_for(1));
assert_eq!(1, h.index_for(1).unwrap());
}

#[test]
fn index_for_first_bucket_last_entry() {
let h = histo64(1, 100_000, 3);
assert_eq!(2047, h.index_for(2047));
assert_eq!(2047, h.index_for(2047).unwrap());
}

#[test]
fn index_for_second_bucket_last_entry() {
let h = histo64(1, 100_000, 3);
assert_eq!(2048 + 1023, h.index_for(2048 + 2047));
assert_eq!(2048 + 1023, h.index_for(2048 + 2047).unwrap());
}

#[test]
fn index_for_second_bucket_last_entry_indistinguishable() {
let h = histo64(1, 100_000, 3);
assert_eq!(2048 + 1023, h.index_for(2048 + 2046));
assert_eq!(2048 + 1023, h.index_for(2048 + 2046).unwrap());
}

#[test]
fn index_for_second_bucket_first_entry() {
let h = histo64(1, 100_000, 3);
assert_eq!(2048, h.index_for(2048));
assert_eq!(2048, h.index_for(2048).unwrap());
}

#[test]
fn index_for_below_smallest() {
let h = histo64(1024, 100_000, 3);

assert_eq!(0, h.index_for(512));
assert_eq!(0, h.index_for(512).unwrap());
}

#[test]
Expand All @@ -377,5 +377,5 @@ fn index_for_way_past_largest_value_exceeds_length() {

// 2^39 = 1024 * 2^29, so this should be the start of the 30th bucket.
// Start index is (bucket index + 1) * 1024.
assert_eq!(1024 * (30 + 1), h.index_for(1 << 40));
assert_eq!(1024 * (30 + 1), h.index_for(1 << 40).unwrap());
}
6 changes: 3 additions & 3 deletions tests/data_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ fn large_percentile() {
#[test]
fn percentile_atorbelow() {
let Loaded { hist, raw, .. } = load_histograms();
assert_near!(99.99, raw.percentile_below(5000), 0.0001);
assert_near!(50.0, hist.percentile_below(5000), 0.0001);
assert_near!(100.0, hist.percentile_below(100000000_u64), 0.0001);
assert_near!(99.99, raw.percentile_below(5000).unwrap(), 0.0001);
assert_near!(50.0, hist.percentile_below(5000).unwrap(), 0.0001);
assert_near!(100.0, hist.percentile_below(100000000_u64).unwrap(), 0.0001);
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions tests/histogram.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ fn empty_histogram() {
assert_eq!(h.max(), 0);
assert_near!(h.mean(), 0.0, 0.0000000000001);
assert_near!(h.stdev(), 0.0, 0.0000000000001);
assert_near!(h.percentile_below(0), 100.0, 0.0000000000001);
assert_near!(h.percentile_below(0).unwrap(), 100.0, 0.0000000000001);
assert!(verify_max(h));
}

Expand Down Expand Up @@ -271,7 +271,7 @@ fn subtract_subtrahend_values_outside_minuend_range_error() {
big += 1000 * TEST_VALUE_LEVEL;
big += 2 * TRACKABLE_MAX;

assert_eq!(SubtractionError::SubtrahendValuesExceedMinuendRange, h1.subtract(&big).unwrap_err());
assert_eq!(SubtractionError::SubtrahendValueExceedsMinuendRange, h1.subtract(&big).unwrap_err());

assert_min_max_count(h1);
assert_min_max_count(big);
Expand Down

0 comments on commit 010f677

Please sign in to comment.