diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index 06687e27..34f32ab1 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -139,7 +139,7 @@ checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] name = "roaring" -version = "0.11.0" +version = "0.11.1" dependencies = [ "bytemuck", "byteorder", diff --git a/fuzz/fuzz_targets/against_croaring.rs b/fuzz/fuzz_targets/against_croaring.rs index 2d6571af..5652c751 100644 --- a/fuzz/fuzz_targets/against_croaring.rs +++ b/fuzz/fuzz_targets/against_croaring.rs @@ -5,11 +5,12 @@ mod arbitrary_ops; use libfuzzer_sys::arbitrary::{self, Arbitrary}; use libfuzzer_sys::fuzz_target; -use crate::arbitrary_ops::{check_equal, Operation}; +use crate::arbitrary_ops::{check_equal, BitmapIteratorOperation, CRoaringIterRange, Operation}; #[derive(Arbitrary, Debug)] struct FuzzInput<'a> { ops: Vec, + iter_ops: Vec, initial_input: &'a [u8], } @@ -35,6 +36,14 @@ fuzz_target!(|input: FuzzInput| { } lhs_r.internal_validate().unwrap(); rhs_r.internal_validate().unwrap(); + + let mut lhs_c_iter = CRoaringIterRange::new(&lhs_c); + let mut lhs_r_iter = lhs_r.iter(); + + for op in input.iter_ops { + op.apply(&mut lhs_c_iter, &mut lhs_r_iter); + } + check_equal(&lhs_c, &lhs_r); check_equal(&rhs_c, &rhs_r); }); diff --git a/fuzz/fuzz_targets/arbitrary_ops/mod.rs b/fuzz/fuzz_targets/arbitrary_ops/mod.rs index 03a00321..423b1b6e 100644 --- a/fuzz/fuzz_targets/arbitrary_ops/mod.rs +++ b/fuzz/fuzz_targets/arbitrary_ops/mod.rs @@ -95,6 +95,16 @@ pub enum BitmapBinaryOperation { AndNot, } +#[derive(Arbitrary, Debug)] +pub enum BitmapIteratorOperation { + Next, + NextBack, + AdvanceTo(Num), + AdvanceBackTo(Num), + Nth(Num), + NthBack(Num), +} + impl ReadBitmapOperation { pub fn apply(&self, x: &mut croaring::Bitmap, y: &mut roaring::RoaringBitmap) { match *self { @@ -387,6 +397,104 @@ impl BitmapBinaryOperation { } } +pub struct CRoaringIterRange<'a> { + cursor: croaring::bitmap::BitmapCursor<'a>, + empty: bool, + start: u32, + end_inclusive: u32, +} + +impl<'a> CRoaringIterRange<'a> { + pub fn new(bitmap: &'a croaring::Bitmap) -> Self { + CRoaringIterRange { + cursor: bitmap.cursor(), + start: 0, + end_inclusive: u32::MAX, + empty: false, + } + } + + fn next(&mut self) -> Option { + if self.empty { + return None; + } + self.cursor.reset_at_or_after(self.start); + let res = self.cursor.current().filter(|&n| n <= self.end_inclusive); + match res { + None => self.empty = true, + Some(n) if n == self.end_inclusive => self.empty = true, + Some(n) => self.start = n + 1, + } + res + } + + fn next_back(&mut self) -> Option { + if self.empty { + return None; + } + self.cursor.reset_at_or_after(self.end_inclusive); + if self.cursor.current().is_none_or(|n| n > self.end_inclusive) { + self.cursor.move_prev(); + } + let res = self.cursor.current().filter(|&n| n >= self.start); + match res { + None => self.empty = true, + Some(n) if n == self.start => self.empty = true, + Some(n) => self.end_inclusive = n - 1, + } + res + } + + fn advance_to(&mut self, num: u32) { + self.start = self.start.max(num); + } + + fn advance_back_to(&mut self, num: u32) { + self.end_inclusive = self.end_inclusive.min(num); + } + + fn nth(&mut self, num: u32) -> Option { + for _ in 0..num { + _ = self.next(); + } + self.next() + } + + fn nth_back(&mut self, num: u32) -> Option { + for _ in 0..num { + _ = self.next_back(); + } + self.next_back() + } +} + +impl BitmapIteratorOperation { + pub fn apply(&self, x: &mut CRoaringIterRange, y: &mut roaring::bitmap::Iter) { + match *self { + BitmapIteratorOperation::Next => { + assert_eq!(x.next(), y.next()); + } + BitmapIteratorOperation::NextBack => { + assert_eq!(x.next_back(), y.next_back()); + } + BitmapIteratorOperation::AdvanceTo(n) => { + x.advance_to(n.0); + y.advance_to(n.0); + } + BitmapIteratorOperation::AdvanceBackTo(n) => { + x.advance_back_to(n.0); + y.advance_back_to(n.0); + } + BitmapIteratorOperation::Nth(n) => { + assert_eq!(x.nth(n.0), y.nth(n.0 as usize)); + } + BitmapIteratorOperation::NthBack(n) => { + assert_eq!(x.nth_back(n.0), y.nth_back(n.0 as usize)); + } + } + } +} + pub(crate) fn check_equal(c: &croaring::Bitmap, r: &roaring::RoaringBitmap) { let mut lhs = c.iter(); let mut rhs = r.iter(); diff --git a/roaring/src/bitmap/store/bitmap_store.rs b/roaring/src/bitmap/store/bitmap_store.rs index dbfdc61f..0503e655 100644 --- a/roaring/src/bitmap/store/bitmap_store.rs +++ b/roaring/src/bitmap/store/bitmap_store.rs @@ -515,6 +515,9 @@ impl> BitmapIter { } else if cmp == Ordering::Equal { self.value_back } else { + // New key is greater than original key and key_back, this iterator is now empty + self.key = self.key_back; + self.value = 0; self.value_back = 0; return; } diff --git a/roaring/src/bitmap/store/interval_store.rs b/roaring/src/bitmap/store/interval_store.rs index f4ae015e..185a719e 100644 --- a/roaring/src/bitmap/store/interval_store.rs +++ b/roaring/src/bitmap/store/interval_store.rs @@ -657,13 +657,18 @@ impl> RunIter { self.forward_offset = value; } else { self.intervals.next(); + self.forward_offset = 0; return; } - if Some(self.forward_offset as u64) - >= self.intervals.as_slice().first().map(|f| f.run_len()) - { + let only_interval = self.intervals.as_slice().len() == 1; + let total_offset = u64::from(self.forward_offset) + + if only_interval { u64::from(self.backward_offset) } else { 0 }; + if Some(total_offset) >= self.intervals.as_slice().first().map(|f| f.run_len()) { self.intervals.next(); self.forward_offset = 0; + if only_interval { + self.backward_offset = 0; + } } } @@ -672,20 +677,26 @@ impl> RunIter { self.backward_offset = value; } else { self.intervals.next_back(); + self.backward_offset = 0; return; } - if Some(self.backward_offset as u64) - >= self.intervals.as_slice().last().map(|f| f.run_len()) - { + let only_interval = self.intervals.as_slice().len() == 1; + let total_offset = u64::from(self.backward_offset) + + if only_interval { u64::from(self.forward_offset) } else { 0 }; + if Some(total_offset) >= self.intervals.as_slice().last().map(|f| f.run_len()) { self.intervals.next_back(); self.backward_offset = 0; + if only_interval { + self.forward_offset = 0; + } } } fn remaining_size(&self) -> usize { - (self.intervals.as_slice().iter().map(|f| f.run_len()).sum::() - - self.forward_offset as u64 - - self.backward_offset as u64) as usize + let total_size = self.intervals.as_slice().iter().map(|f| f.run_len()).sum::(); + let total_offset = u64::from(self.forward_offset) + u64::from(self.backward_offset); + debug_assert!(total_size >= total_offset); + total_size.saturating_sub(total_offset) as usize } /// Advance the iterator to the first value greater than or equal to `n`. @@ -708,10 +719,25 @@ impl> RunIter { if let Some(value) = index.checked_sub(1) { self.intervals.nth(value); } - self.forward_offset = n - self.intervals.as_slice().first().unwrap().start; + let first_interval = self.intervals.as_slice().first().unwrap(); + self.forward_offset = n - first_interval.start; + if self.intervals.as_slice().len() == 1 + && u64::from(self.forward_offset) + u64::from(self.backward_offset) + >= first_interval.run_len() + { + // If we are now the only interval, and we've now met the forward offset, + // consume the final interval + _ = self.intervals.next(); + self.forward_offset = 0; + self.backward_offset = 0; + } } Err(index) => { if index == self.intervals.as_slice().len() { + // Consume the whole iterator + self.intervals.nth(index); + self.forward_offset = 0; + self.backward_offset = 0; return; } if let Some(value) = index.checked_sub(1) { @@ -743,10 +769,25 @@ impl> RunIter { if let Some(value) = backward_index.checked_sub(1) { self.intervals.nth_back(value); } - self.backward_offset = self.intervals.as_slice().last().unwrap().end - n; + let last_interval = self.intervals.as_slice().last().unwrap(); + self.backward_offset = last_interval.end - n; + if self.intervals.as_slice().len() == 1 + && u64::from(self.forward_offset) + u64::from(self.backward_offset) + >= last_interval.run_len() + { + // If we are now the only interval, and we've now met the forward offset, + // consume the final interval + _ = self.intervals.next_back(); + self.forward_offset = 0; + self.backward_offset = 0; + } } Err(index) => { if index == 0 { + // Consume the whole iterator + self.intervals.nth_back(self.intervals.as_slice().len()); + self.forward_offset = 0; + self.backward_offset = 0; return; } let backward_index = self.intervals.as_slice().len() - index; @@ -778,12 +819,24 @@ impl> Iterator for RunIter { } fn nth(&mut self, n: usize) -> Option { + if n > usize::from(u16::MAX) { + // Consume the whole iterator + self.intervals.nth(self.intervals.as_slice().len()); + self.forward_offset = 0; + self.backward_offset = 0; + return None; + } if let Some(skip) = n.checked_sub(1) { let mut to_skip = skip as u64; loop { - let to_remove = (self.intervals.as_slice().first()?.run_len() - - self.forward_offset as u64) - .min(to_skip); + let full_first_interval_len = self.intervals.as_slice().first()?.run_len(); + let consumed_len = u64::from(self.forward_offset) + + if self.intervals.as_slice().len() == 1 { + u64::from(self.backward_offset) + } else { + 0 + }; + let to_remove = (full_first_interval_len - consumed_len).min(to_skip); to_skip -= to_remove; self.forward_offset += to_remove as u16; self.move_next(); @@ -2401,7 +2454,7 @@ mod tests { iter.advance_to(800); assert_eq!(iter.next(), Some(800)); iter.advance_to(u16::MAX); - assert_eq!(iter.next(), Some(801)); + assert_eq!(iter.next(), None); let mut iter = interval_store.iter(); iter.advance_to(100); diff --git a/roaring/tests/iter.rs b/roaring/tests/iter.rs index 05591681..ea3d5571 100644 --- a/roaring/tests/iter.rs +++ b/roaring/tests/iter.rs @@ -210,6 +210,31 @@ fn interleaved_bitmap() { assert!(outside_in(values).eq(outside_in(bitmap))); } +#[test] +fn run_nth_max() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..0x1_0000); + let mut iter = bitmap.iter(); + assert_eq!(iter.nth(0x0_FFFF), Some(0x0_FFFF)); + assert_eq!(iter.len(), 0); + #[allow(clippy::iter_nth_zero)] + { + assert_eq!(iter.nth(0), None); + } + assert_eq!(iter.next(), None); +} + +#[test] +fn run_nth_back_max() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..0x1_0000); + let mut iter = bitmap.iter(); + assert_eq!(iter.nth_back(0x0_FFFF), Some(0)); + assert_eq!(iter.len(), 0); + assert_eq!(iter.nth_back(0), None); + assert_eq!(iter.next_back(), None); +} + proptest! { #[test] fn interleaved_iter(values in btree_set(any::(), 50_000..=100_000)) { diff --git a/roaring/tests/iter_advance_to.rs b/roaring/tests/iter_advance_to.rs index d07ca671..8e3566b3 100644 --- a/roaring/tests/iter_advance_to.rs +++ b/roaring/tests/iter_advance_to.rs @@ -206,3 +206,95 @@ fn advance_bitset_back_to_start_word() { } assert_eq!(iter.next(), None); } + +#[test] +fn advance_run_past_the_end() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x35B00); + let mut iter = bitmap.iter(); + iter.advance_to(0x35B01); + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_run_back_before_start() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(500..=0x35B00); + let mut iter = bitmap.iter(); + iter.advance_back_to(499); + assert_eq!(iter.next_back(), None); +} + +#[test] +fn advance_run_back_reduces_forward_iter() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x4000); + let mut iter = bitmap.iter(); + iter.advance_back_to(1); + + assert_eq!(iter.next(), Some(0)); + assert_eq!(iter.next(), Some(1)); + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_run_front_and_back_past_each_other() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x4000); + let mut iter = bitmap.iter(); + iter.advance_back_to(100); + iter.advance_to(300); + assert_eq!(iter.next(), None); +} + +#[test] +fn advance_run_both_sides_past_each_other() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..0x1000); + let mut iter = bitmap.iter(); + iter.advance_back_to(100); + iter.advance_to(0xFFFF); + assert_eq!(iter.len(), 0); + assert_eq!(iter.nth_back(0), None); +} + +#[test] +fn advance_run_with_nth() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(36141..=224407); + let mut iter = bitmap.iter(); + iter.advance_back_to(101779); + assert_eq!(iter.nth(100563), None); +} + +#[test] +fn advance_to_with_next_len() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(100..0x4000); + let mut iter = bitmap.iter(); + iter.advance_back_to(100); + assert_eq!(iter.next(), Some(100)); + assert_eq!(iter.len(), 0); + assert_eq!(iter.nth_back(0), None); +} + +#[test] +fn tmp() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(196363..=262143); + let mut iter = bitmap.iter(); + assert_eq!(iter.next_back(), Some(262143)); + iter.advance_to(228960); + assert_eq!(iter.nth(36643), None); +} + +#[test] +fn advance_bitset_front_and_back_past_each_other() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert_range(0..=0x4000); + bitmap.remove_run_compression(); + let mut iter = bitmap.iter(); + iter.advance_back_to(100); + iter.advance_to(300); + assert_eq!(iter.next(), None); +}