diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index a720c2e6..dcbb8c02 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -11,7 +11,6 @@ const ARRAY_LIMIT: u64 = 4096; #[derive(PartialEq, Clone)] pub struct Container { pub key: u16, - pub len: u64, pub store: Store, } @@ -22,14 +21,17 @@ pub struct Iter<'a> { impl Container { pub fn new(key: u16) -> Container { - Container { key, len: 0, store: Store::Array(Vec::new()) } + Container { key, store: Store::new() } } } impl Container { + pub fn len(&self) -> u64 { + self.store.len() + } + pub fn insert(&mut self, index: u16) -> bool { if self.store.insert(index) { - self.len += 1; self.ensure_correct_store(); true } else { @@ -39,7 +41,6 @@ impl Container { pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { let inserted = self.store.insert_range(range); - self.len += inserted; self.ensure_correct_store(); inserted } @@ -49,7 +50,6 @@ impl Container { /// Returns whether the `index` was effectively pushed. pub fn push(&mut self, index: u16) -> bool { if self.store.push(index) { - self.len += 1; self.ensure_correct_store(); true } else { @@ -59,7 +59,6 @@ impl Container { pub fn remove(&mut self, index: u16) -> bool { if self.store.remove(index) { - self.len -= 1; self.ensure_correct_store(); true } else { @@ -69,7 +68,6 @@ impl Container { pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { let result = self.store.remove_range(range); - self.len -= result; self.ensure_correct_store(); result } @@ -83,7 +81,7 @@ impl Container { } pub fn is_subset(&self, other: &Self) -> bool { - self.len <= other.len && self.store.is_subset(&other.store) + self.len() <= other.len() && self.store.is_subset(&other.store) } pub fn min(&self) -> Option { @@ -95,14 +93,18 @@ impl Container { } fn ensure_correct_store(&mut self) { - let new_store = match (&self.store, self.len) { - (store @ &Store::Bitmap(..), len) if len <= ARRAY_LIMIT => Some(store.to_array()), - (store @ &Store::Array(..), len) if len > ARRAY_LIMIT => Some(store.to_bitmap()), - _ => None, + match &self.store { + Store::Bitmap(ref bits) => { + if bits.len() <= ARRAY_LIMIT { + self.store = Store::Array(bits.to_array_store()) + } + } + Store::Array(ref vec) => { + if vec.len() as u64 > ARRAY_LIMIT { + self.store = Store::Bitmap(vec.to_bitmap_store()) + } + } }; - if let Some(new_store) = new_store { - self.store = new_store; - } } } @@ -111,7 +113,7 @@ impl BitOr<&Container> for &Container { fn bitor(self, rhs: &Container) -> Container { let store = BitOr::bitor(&self.store, &rhs.store); - let mut container = Container { key: self.key, len: store.len(), store }; + let mut container = Container { key: self.key, store }; container.ensure_correct_store(); container } @@ -120,7 +122,6 @@ impl BitOr<&Container> for &Container { impl BitOrAssign for Container { fn bitor_assign(&mut self, rhs: Container) { BitOrAssign::bitor_assign(&mut self.store, rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -128,7 +129,6 @@ impl BitOrAssign for Container { impl BitOrAssign<&Container> for Container { fn bitor_assign(&mut self, rhs: &Container) { BitOrAssign::bitor_assign(&mut self.store, &rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -138,7 +138,7 @@ impl BitAnd<&Container> for &Container { fn bitand(self, rhs: &Container) -> Container { let store = BitAnd::bitand(&self.store, &rhs.store); - let mut container = Container { key: self.key, len: store.len(), store }; + let mut container = Container { key: self.key, store }; container.ensure_correct_store(); container } @@ -147,7 +147,6 @@ impl BitAnd<&Container> for &Container { impl BitAndAssign for Container { fn bitand_assign(&mut self, rhs: Container) { BitAndAssign::bitand_assign(&mut self.store, rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -155,7 +154,6 @@ impl BitAndAssign for Container { impl BitAndAssign<&Container> for Container { fn bitand_assign(&mut self, rhs: &Container) { BitAndAssign::bitand_assign(&mut self.store, &rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -165,7 +163,7 @@ impl Sub<&Container> for &Container { fn sub(self, rhs: &Container) -> Container { let store = Sub::sub(&self.store, &rhs.store); - let mut container = Container { key: self.key, len: store.len(), store }; + let mut container = Container { key: self.key, store }; container.ensure_correct_store(); container } @@ -174,7 +172,6 @@ impl Sub<&Container> for &Container { impl SubAssign<&Container> for Container { fn sub_assign(&mut self, rhs: &Container) { SubAssign::sub_assign(&mut self.store, &rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -184,7 +181,7 @@ impl BitXor<&Container> for &Container { fn bitxor(self, rhs: &Container) -> Container { let store = BitXor::bitxor(&self.store, &rhs.store); - let mut container = Container { key: self.key, len: store.len(), store }; + let mut container = Container { key: self.key, store }; container.ensure_correct_store(); container } @@ -193,7 +190,6 @@ impl BitXor<&Container> for &Container { impl BitXorAssign for Container { fn bitxor_assign(&mut self, rhs: Container) { BitXorAssign::bitxor_assign(&mut self.store, rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -201,7 +197,6 @@ impl BitXorAssign for Container { impl BitXorAssign<&Container> for Container { fn bitxor_assign(&mut self, rhs: &Container) { BitXorAssign::bitxor_assign(&mut self.store, &rhs.store); - self.len = self.store.len(); self.ensure_correct_store(); } } @@ -236,6 +231,6 @@ impl<'a> Iterator for Iter<'a> { impl fmt::Debug for Container { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - format!("Container<{:?} @ {:?}>", self.len, self.key).fmt(formatter) + format!("Container<{:?} @ {:?}>", self.len(), self.key).fmt(formatter) } } diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index 077c39a3..964b4767 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -169,7 +169,7 @@ impl RoaringBitmap { match self.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { if self.containers[loc].remove(index) { - if self.containers[loc].len == 0 { + if self.containers[loc].len() == 0 { self.containers.remove(loc); } true @@ -214,7 +214,7 @@ impl RoaringBitmap { let a = if key == start_container_key { start_index } else { 0 }; let b = if key == end_container_key { end_index } else { u16::MAX }; removed += self.containers[index].remove_range(a..=b); - if self.containers[index].len == 0 { + if self.containers[index].len() == 0 { self.containers.remove(index); continue; } @@ -297,7 +297,7 @@ impl RoaringBitmap { /// assert_eq!(rb.len(), 2); /// ``` pub fn len(&self) -> u64 { - self.containers.iter().map(|container| container.len).sum() + self.containers.iter().map(|container| container.len()).sum() } /// Returns the minimum value in the set (if the set is non-empty). @@ -447,7 +447,7 @@ mod tests { let inserted = b.insert_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1); assert_eq!(inserted, 1); - assert_eq!(b.containers[0].len, 1); + assert_eq!(b.containers[0].len(), 1); assert_eq!(b.containers.len(), 1); let removed = b.remove_range(u16::MAX as u32 + 1..=u16::MAX as u32 + 1); diff --git a/src/bitmap/iter.rs b/src/bitmap/iter.rs index 3f681175..42ea4069 100644 --- a/src/bitmap/iter.rs +++ b/src/bitmap/iter.rs @@ -23,14 +23,14 @@ pub struct IntoIter { impl Iter<'_> { fn new(containers: &[Container]) -> Iter { - let size_hint = containers.iter().map(|c| c.len).sum(); + let size_hint = containers.iter().map(|c| c.len()).sum(); Iter { inner: containers.iter().flat_map(identity as _), size_hint } } } impl IntoIter { fn new(containers: Vec) -> IntoIter { - let size_hint = containers.iter().map(|c| c.len).sum(); + let size_hint = containers.iter().map(|c| c.len()).sum(); IntoIter { inner: containers.into_iter().flat_map(identity as _), size_hint } } } diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index 4e822799..54b5a951 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -276,7 +276,7 @@ impl BitAnd<&RoaringBitmap> for &RoaringBitmap { for pair in Pairs::new(&self.containers, &rhs.containers) { if let (Some(lhs), Some(rhs)) = pair { let container = BitAnd::bitand(lhs, rhs); - if container.len != 0 { + if container.len() != 0 { containers.push(container); } } @@ -301,7 +301,7 @@ impl BitAndAssign for RoaringBitmap { let rhs_cont = &mut rhs.containers[loc]; let rhs_cont = mem::replace(rhs_cont, Container::new(rhs_cont.key)); BitAndAssign::bitand_assign(cont, rhs_cont); - cont.len != 0 + cont.len() != 0 } Err(_) => false, } @@ -317,7 +317,7 @@ impl BitAndAssign<&RoaringBitmap> for RoaringBitmap { match rhs.containers.binary_search_by_key(&key, |c| c.key) { Ok(loc) => { BitAndAssign::bitand_assign(cont, &rhs.containers[loc]); - cont.len != 0 + cont.len() != 0 } Err(_) => false, } @@ -367,7 +367,7 @@ impl Sub<&RoaringBitmap> for &RoaringBitmap { (None, Some(_)) => (), (Some(lhs), Some(rhs)) => { let container = Sub::sub(lhs, rhs); - if container.len != 0 { + if container.len() != 0 { containers.push(container); } } @@ -393,7 +393,7 @@ impl SubAssign<&RoaringBitmap> for RoaringBitmap { match rhs.containers.binary_search_by_key(&cont.key, |c| c.key) { Ok(loc) => { SubAssign::sub_assign(cont, &rhs.containers[loc]); - cont.len != 0 + cont.len() != 0 } Err(_) => true, } @@ -443,7 +443,7 @@ impl BitXor<&RoaringBitmap> for &RoaringBitmap { (None, Some(rhs)) => containers.push(rhs.clone()), (Some(lhs), Some(rhs)) => { let container = BitXor::bitxor(lhs, rhs); - if container.len != 0 { + if container.len() != 0 { containers.push(container); } } @@ -462,7 +462,7 @@ impl BitXorAssign for RoaringBitmap { match pair { (Some(mut lhs), Some(rhs)) => { BitXorAssign::bitxor_assign(&mut lhs, rhs); - if lhs.len != 0 { + if lhs.len() != 0 { self.containers.push(lhs); } } @@ -481,7 +481,7 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap { match pair { (Some(mut lhs), Some(rhs)) => { BitXorAssign::bitxor_assign(&mut lhs, rhs); - if lhs.len != 0 { + if lhs.len() != 0 { self.containers.push(lhs); } } diff --git a/src/bitmap/serialization.rs b/src/bitmap/serialization.rs index 5eb1c240..5fc9b3c8 100644 --- a/src/bitmap/serialization.rs +++ b/src/bitmap/serialization.rs @@ -3,7 +3,7 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use std::io; use super::container::Container; -use super::store::Store; +use crate::bitmap::store::{ArrayStore, BitmapStore, Store}; use crate::RoaringBitmap; const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; @@ -32,7 +32,7 @@ impl RoaringBitmap { .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => 8 + values.len() * 2, + Store::Array(ref values) => 8 + values.len() as usize * 2, Store::Bitmap(..) => 8 + 8 * 1024, }) .sum(); @@ -64,7 +64,7 @@ impl RoaringBitmap { for container in &self.containers { writer.write_u16::(container.key)?; - writer.write_u16::((container.len - 1) as u16)?; + writer.write_u16::((container.len() - 1) as u16)?; } let mut offset = 8 + 8 * self.containers.len() as u32; @@ -83,12 +83,12 @@ impl RoaringBitmap { for container in &self.containers { match container.store { Store::Array(ref values) => { - for &value in values { + for &value in values.iter() { writer.write_u16::(value)?; } } - Store::Bitmap(ref values) => { - for &value in values.iter() { + Store::Bitmap(ref bits) => { + for &value in bits.as_array() { writer.write_u64::(value)?; } } @@ -152,15 +152,15 @@ impl RoaringBitmap { let mut values = vec![0; len as usize]; reader.read_exact(cast_slice_mut(&mut values))?; values.iter_mut().for_each(|n| *n = u16::from_le(*n)); - Store::Array(values) + Store::Array(ArrayStore::from_vec_unchecked(values)) } else { let mut values = Box::new([0; 1024]); reader.read_exact(cast_slice_mut(&mut values[..]))?; values.iter_mut().for_each(|n| *n = u64::from_le(*n)); - Store::Bitmap(values) + Store::Bitmap(BitmapStore::from_unchecked(len, values)) }; - containers.push(Container { key, len, store }); + containers.push(Container { key, store }); } Ok(RoaringBitmap { containers }) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs deleted file mode 100644 index a4f2e83f..00000000 --- a/src/bitmap/store.rs +++ /dev/null @@ -1,1022 +0,0 @@ -use std::borrow::Borrow; -use std::cmp::Ordering::{Equal, Greater, Less}; -use std::mem; -use std::ops::{ - BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, -}; -use std::{slice, vec}; - -use self::Store::{Array, Bitmap}; - -const BITMAP_LENGTH: usize = 1024; - -pub enum Store { - Array(Vec), - Bitmap(Box<[u64; BITMAP_LENGTH]>), -} - -pub enum Iter<'a> { - Array(slice::Iter<'a, u16>), - Vec(vec::IntoIter), - BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), - BitmapOwned(BitmapIter>), -} - -pub struct BitmapIter> { - key: usize, - value: u64, - bits: B, -} - -impl Store { - pub fn insert(&mut self, index: u16) -> bool { - match *self { - Array(ref mut vec) => { - vec.binary_search(&index).map_err(|loc| vec.insert(loc, index)).is_err() - } - Bitmap(ref mut bits) => { - let (key, bit) = (key(index), bit(index)); - if bits[key] & (1 << bit) == 0 { - bits[key] |= 1 << bit; - true - } else { - false - } - } - } - } - - pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { - // A Range is defined as being of size 0 if start >= end. - if range.is_empty() { - return 0; - } - - let start = *range.start(); - let end = *range.end(); - - match *self { - Array(ref mut vec) => { - // Figure out the starting/ending position in the vec. - let pos_start = vec.binary_search(&start).unwrap_or_else(|x| x); - let pos_end = vec - .binary_search_by(|p| { - // binary search the right most position when equals - match p.cmp(&end) { - Greater => Greater, - _ => Less, - } - }) - .unwrap_or_else(|x| x); - - // Overwrite the range in the middle - there's no need to take - // into account any existing elements between start and end, as - // they're all being added to the set. - let dropped = vec.splice(pos_start..pos_end, start..=end); - - end as u64 - start as u64 + 1 - dropped.len() as u64 - } - Bitmap(ref mut bits) => { - let (start_key, start_bit) = (key(start), bit(start)); - let (end_key, end_bit) = (key(end), bit(end)); - - // MSB > start_bit > end_bit > LSB - if start_key == end_key { - // Set the end_bit -> LSB to 1 - let mut mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; - // Set MSB -> start_bit to 1 - mask &= !((1 << start_bit) - 1); - - let existed = (bits[start_key] & mask).count_ones(); - bits[start_key] |= mask; - - return u64::from(end - start + 1) - u64::from(existed); - } - - // Mask off the left-most bits (MSB -> start_bit) - let mask = !((1 << start_bit) - 1); - - // Keep track of the number of bits that were already set to - // return how many new bits were set later - let mut existed = (bits[start_key] & mask).count_ones(); - - bits[start_key] |= mask; - - // Set the full blocks, tracking the number of set bits - for i in (start_key + 1)..end_key { - existed += bits[i].count_ones(); - bits[i] = u64::MAX; - } - - // Set the end bits in the last chunk (MSB -> end_bit) - let mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; - existed += (bits[end_key] & mask).count_ones(); - bits[end_key] |= mask; - - end as u64 - start as u64 + 1 - existed as u64 - } - } - } - - /// Push `index` at the end of the store only if `index` is the new max. - /// - /// Returns whether `index` was effectively pushed. - pub fn push(&mut self, index: u16) -> bool { - if self.max().map_or(true, |max| max < index) { - match self { - Array(vec) => vec.push(index), - Bitmap(bits) => { - let (key, bit) = (key(index), bit(index)); - bits[key] |= 1 << bit; - } - } - true - } else { - false - } - } - - pub fn remove(&mut self, index: u16) -> bool { - match *self { - Array(ref mut vec) => vec.binary_search(&index).map(|loc| vec.remove(loc)).is_ok(), - Bitmap(ref mut bits) => { - let (key, bit) = (key(index), bit(index)); - if bits[key] & (1 << bit) != 0 { - bits[key] &= !(1 << bit); - true - } else { - false - } - } - } - } - - pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { - if range.is_empty() { - return 0; - } - - let start = *range.start(); - let end = *range.end(); - - match *self { - Array(ref mut vec) => { - // Figure out the starting/ending position in the vec. - let pos_start = vec.binary_search(&start).unwrap_or_else(|x| x); - let pos_end = vec - .binary_search_by(|p| { - // binary search the right most position when equals - match p.cmp(&end) { - Greater => Greater, - _ => Less, - } - }) - .unwrap_or_else(|x| x); - vec.drain(pos_start..pos_end); - (pos_end - pos_start) as u64 - } - Bitmap(ref mut bits) => { - let (start_key, start_bit) = (key(start), bit(start)); - let (end_key, end_bit) = (key(end), bit(end)); - - if start_key == end_key { - let mask = (u64::MAX << start_bit) & (u64::MAX >> (63 - end_bit)); - let removed = (bits[start_key] & mask).count_ones(); - bits[start_key] &= !mask; - return u64::from(removed); - } - - let mut removed = 0; - // start key bits - removed += (bits[start_key] & (u64::MAX << start_bit)).count_ones(); - bits[start_key] &= !(u64::MAX << start_bit); - // counts bits in between - for word in &bits[start_key + 1..end_key] { - removed += word.count_ones(); - // When popcnt is available zeroing in this loop is faster, - // but we opt to perform reasonably on most cpus by zeroing after. - // By doing that the compiler uses simd to count ones. - } - // do zeroing outside the loop - for word in &mut bits[start_key + 1..end_key] { - *word = 0; - } - // end key bits - removed += (bits[end_key] & (u64::MAX >> (63 - end_bit))).count_ones(); - bits[end_key] &= !(u64::MAX >> (63 - end_bit)); - u64::from(removed) - } - } - } - - pub fn contains(&self, index: u16) -> bool { - match *self { - Array(ref vec) => vec.binary_search(&index).is_ok(), - Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, - } - } - - pub fn is_disjoint<'a>(&'a self, other: &'a Self) -> bool { - match (self, other) { - (&Array(ref vec1), &Array(ref vec2)) => { - let (mut i1, mut i2) = (vec1.iter(), vec2.iter()); - let (mut value1, mut value2) = (i1.next(), i2.next()); - loop { - match value1.and_then(|v1| value2.map(|v2| v1.cmp(v2))) { - None => return true, - Some(Equal) => return false, - Some(Less) => value1 = i1.next(), - Some(Greater) => value2 = i2.next(), - } - } - } - (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { - bits1.iter().zip(bits2.iter()).all(|(&i1, &i2)| (i1 & i2) == 0) - } - (&Array(ref vec), store @ &Bitmap(..)) | (store @ &Bitmap(..), &Array(ref vec)) => { - vec.iter().all(|&i| !store.contains(i)) - } - } - } - - pub fn is_subset(&self, other: &Self) -> bool { - match (self, other) { - (&Array(ref vec1), &Array(ref vec2)) => { - let (mut i1, mut i2) = (vec1.iter(), vec2.iter()); - let (mut value1, mut value2) = (i1.next(), i2.next()); - loop { - match (value1, value2) { - (None, _) => return true, - (Some(..), None) => return false, - (Some(v1), Some(v2)) => match v1.cmp(v2) { - Equal => { - value1 = i1.next(); - value2 = i2.next(); - } - Less => return false, - Greater => value2 = i2.next(), - }, - } - } - } - (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { - bits1.iter().zip(bits2.iter()).all(|(&i1, &i2)| (i1 & i2) == i1) - } - (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), - (&Bitmap(..), &Array(..)) => false, - } - } - - pub fn to_array(&self) -> Self { - match *self { - Array(..) => panic!("Cannot convert array to array"), - Bitmap(ref bits) => { - let mut vec = Vec::new(); - for (index, mut bit) in bits.iter().cloned().enumerate() { - while bit != 0 { - vec.push((u64::trailing_zeros(bit) + (64 * index as u32)) as u16); - bit &= bit - 1; - } - } - Array(vec) - } - } - } - - pub fn to_bitmap(&self) -> Self { - match *self { - Array(ref vec) => { - let mut bits = Box::new([0; BITMAP_LENGTH]); - for &index in vec { - bits[key(index)] |= 1 << bit(index); - } - Bitmap(bits) - } - Bitmap(..) => panic!("Cannot convert bitmap to bitmap"), - } - } - - pub fn len(&self) -> u64 { - match *self { - Array(ref vec) => vec.len() as u64, - Bitmap(ref bits) => bits.iter().map(|bit| u64::from(bit.count_ones())).sum(), - } - } - - pub fn min(&self) -> Option { - match *self { - Array(ref vec) => vec.first().copied(), - Bitmap(ref bits) => bits - .iter() - .enumerate() - .find(|&(_, &bit)| bit != 0) - .map(|(index, bit)| (index * 64 + (bit.trailing_zeros() as usize)) as u16), - } - } - - pub fn max(&self) -> Option { - match *self { - Array(ref vec) => vec.last().copied(), - Bitmap(ref bits) => bits - .iter() - .enumerate() - .rev() - .find(|&(_, &bit)| bit != 0) - .map(|(index, bit)| (index * 64 + (63 - bit.leading_zeros() as usize)) as u16), - } - } -} - -impl BitOr<&Store> for &Store { - type Output = Store; - - fn bitor(self, rhs: &Store) -> Store { - match (self, rhs) { - (&Array(ref vec1), &Array(ref vec2)) => Array(union_arrays(vec1, vec2)), - (&Bitmap(_), &Array(_)) => { - let mut lhs = self.clone(); - BitOrAssign::bitor_assign(&mut lhs, rhs); - lhs - } - (&Bitmap(_), &Bitmap(_)) => { - let mut lhs = self.clone(); - BitOrAssign::bitor_assign(&mut lhs, rhs); - lhs - } - (&Array(_), &Bitmap(_)) => { - let mut rhs = rhs.clone(); - BitOrAssign::bitor_assign(&mut rhs, self); - rhs - } - } - } -} - -impl BitOrAssign for Store { - fn bitor_assign(&mut self, mut rhs: Store) { - match (self, &mut rhs) { - (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { - *vec1 = union_arrays(vec1, vec2); - } - (this @ &mut Bitmap(..), &mut Array(ref vec)) => { - vec.iter().for_each(|index| { - this.insert(*index); - }); - } - (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitOrAssign::bitor_assign(index1, index2); - } - } - (this @ &mut Array(..), &mut Bitmap(..)) => { - mem::swap(this, &mut rhs); - BitOrAssign::bitor_assign(this, rhs); - } - } - } -} - -impl BitOrAssign<&Store> for Store { - fn bitor_assign(&mut self, rhs: &Store) { - match (self, rhs) { - (&mut Array(ref mut vec1), &Array(ref vec2)) => { - let this = mem::take(vec1); - *vec1 = union_arrays(&this, vec2); - } - (this @ &mut Bitmap(..), &Array(ref vec)) => { - vec.iter().for_each(|index| { - this.insert(*index); - }); - } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitOrAssign::bitor_assign(index1, index2); - } - } - (this @ &mut Array(..), &Bitmap(..)) => { - *this = this.to_bitmap(); - BitOrAssign::bitor_assign(this, rhs); - } - } - } -} - -impl BitAnd<&Store> for &Store { - type Output = Store; - - fn bitand(self, rhs: &Store) -> Store { - match (self, rhs) { - (&Array(ref vec1), &Array(ref vec2)) => Array(intersect_arrays(vec1, vec2)), - (&Bitmap(_), &Array(_)) => { - let mut rhs = rhs.clone(); - BitAndAssign::bitand_assign(&mut rhs, self); - rhs - } - _ => { - let mut lhs = self.clone(); - BitAndAssign::bitand_assign(&mut lhs, rhs); - lhs - } - } - } -} - -impl BitAndAssign for Store { - #[allow(clippy::suspicious_op_assign_impl)] - fn bitand_assign(&mut self, mut rhs: Store) { - match (self, &mut rhs) { - (&mut Array(ref mut lhs), &mut Array(ref mut rhs)) => { - if rhs.len() < lhs.len() { - mem::swap(lhs, rhs); - } - - let mut i = 0; - lhs.retain(|x| { - i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); - rhs.get(i).map_or(false, |y| x == y) - }); - } - (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitAndAssign::bitand_assign(index1, index2); - } - } - (&mut Array(ref mut vec), store @ &mut Bitmap(..)) => { - vec.retain(|x| store.contains(*x)); - } - (this @ &mut Bitmap(..), &mut Array(..)) => { - mem::swap(this, &mut rhs); - BitAndAssign::bitand_assign(this, rhs); - } - } - } -} - -impl BitAndAssign<&Store> for Store { - #[allow(clippy::suspicious_op_assign_impl)] - fn bitand_assign(&mut self, rhs: &Store) { - match (self, rhs) { - (&mut Array(ref mut vec1), &Array(ref vec2)) => { - let (mut lhs, rhs) = if vec1.len() <= vec2.len() { - (mem::take(vec1), vec2.as_slice()) - } else { - (vec2.clone(), vec1.as_slice()) - }; - - let mut i = 0; - lhs.retain(|x| { - i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); - rhs.get(i).map_or(false, |y| x == y) - }); - - *vec1 = lhs; - } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitAndAssign::bitand_assign(index1, index2); - } - } - (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - vec.retain(|x| store.contains(*x)); - } - (this @ &mut Bitmap(..), &Array(..)) => { - let mut new = rhs.clone(); - BitAndAssign::bitand_assign(&mut new, &*this); - *this = new; - } - } - } -} - -impl Sub<&Store> for &Store { - type Output = Store; - - fn sub(self, rhs: &Store) -> Store { - match (self, rhs) { - (&Array(ref vec1), &Array(ref vec2)) => Array(difference_arrays(vec1, vec2)), - _ => { - let mut lhs = self.clone(); - SubAssign::sub_assign(&mut lhs, rhs); - lhs - } - } - } -} - -impl SubAssign<&Store> for Store { - fn sub_assign(&mut self, rhs: &Store) { - match (self, rhs) { - (&mut Array(ref mut lhs), &Array(ref rhs)) => { - let mut i = 0; - lhs.retain(|x| { - i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.len()); - rhs.get(i).map_or(true, |y| x != y) - }); - } - (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { - vec2.iter().for_each(|index| { - this.remove(*index); - }); - } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - *index1 &= !*index2; - } - } - (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - vec.retain(|x| !store.contains(*x)); - } - } - } -} - -impl BitXor<&Store> for &Store { - type Output = Store; - - fn bitxor(self, rhs: &Store) -> Store { - match (self, rhs) { - (&Array(ref vec1), &Array(ref vec2)) => Array(symmetric_difference_arrays(vec1, vec2)), - (&Array(_), &Bitmap(_)) => { - let mut lhs = rhs.clone(); - BitXorAssign::bitxor_assign(&mut lhs, self); - lhs - } - _ => { - let mut lhs = self.clone(); - BitXorAssign::bitxor_assign(&mut lhs, rhs); - lhs - } - } - } -} - -impl BitXorAssign for Store { - fn bitxor_assign(&mut self, mut rhs: Store) { - // TODO improve this function - match (self, &mut rhs) { - (&mut Array(ref mut vec1), &mut Array(ref mut vec2)) => { - let mut i1 = 0usize; - let mut iter2 = vec2.iter(); - let mut current2 = iter2.next(); - while i1 < vec1.len() { - match current2.map(|c2| vec1[i1].cmp(c2)) { - None => break, - Some(Less) => { - i1 += 1; - } - Some(Greater) => { - vec1.insert(i1, *current2.unwrap()); - i1 += 1; - current2 = iter2.next(); - } - Some(Equal) => { - vec1.remove(i1); - current2 = iter2.next(); - } - } - } - if let Some(current) = current2 { - vec1.push(*current); - vec1.extend(iter2.cloned()); - } - } - (ref mut this @ &mut Bitmap(..), &mut Array(ref mut vec2)) => { - for index in vec2 { - if this.contains(*index) { - this.remove(*index); - } else { - this.insert(*index); - } - } - } - (&mut Bitmap(ref mut bits1), &mut Bitmap(ref mut bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitXorAssign::bitxor_assign(index1, index2); - } - } - (this @ &mut Array(..), &mut Bitmap(..)) => { - mem::swap(this, &mut rhs); - BitXorAssign::bitxor_assign(this, rhs); - } - } - } -} - -impl BitXorAssign<&Store> for Store { - fn bitxor_assign(&mut self, rhs: &Store) { - match (self, rhs) { - (&mut Array(ref mut vec1), &Array(ref vec2)) => { - let mut i1 = 0usize; - let mut iter2 = vec2.iter(); - let mut current2 = iter2.next(); - while i1 < vec1.len() { - match current2.map(|c2| vec1[i1].cmp(c2)) { - None => break, - Some(Less) => { - i1 += 1; - } - Some(Greater) => { - vec1.insert(i1, *current2.unwrap()); - i1 += 1; - current2 = iter2.next(); - } - Some(Equal) => { - vec1.remove(i1); - current2 = iter2.next(); - } - } - } - if let Some(current) = current2 { - vec1.push(*current); - vec1.extend(iter2.cloned()); - } - } - (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { - for index in vec2.iter() { - if this.contains(*index) { - this.remove(*index); - } else { - this.insert(*index); - } - } - } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, index2) in bits1.iter_mut().zip(bits2.iter()) { - BitXorAssign::bitxor_assign(index1, index2); - } - } - (this @ &mut Array(..), &Bitmap(..)) => { - let mut new = rhs.clone(); - BitXorAssign::bitxor_assign(&mut new, &*this); - *this = new; - } - } - } -} - -impl<'a> IntoIterator for &'a Store { - type Item = u16; - type IntoIter = Iter<'a>; - fn into_iter(self) -> Iter<'a> { - match *self { - Array(ref vec) => Iter::Array(vec.iter()), - Bitmap(ref bits) => Iter::BitmapBorrowed(BitmapIter::new(&**bits)), - } - } -} - -impl IntoIterator for Store { - type Item = u16; - type IntoIter = Iter<'static>; - fn into_iter(self) -> Iter<'static> { - match self { - Array(vec) => Iter::Vec(vec.into_iter()), - Bitmap(bits) => Iter::BitmapOwned(BitmapIter::new(bits)), - } - } -} - -impl PartialEq for Store { - fn eq(&self, other: &Self) -> bool { - match (self, other) { - (&Array(ref vec1), &Array(ref vec2)) => vec1 == vec2, - (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { - bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) - } - _ => false, - } - } -} - -impl Clone for Store { - fn clone(&self) -> Self { - match *self { - Array(ref vec) => Array(vec.clone()), - Bitmap(ref bits) => Bitmap(Box::new(**bits)), - } - } -} - -impl> BitmapIter { - fn new(bits: B) -> BitmapIter { - BitmapIter { key: 0, value: bits.borrow()[0], bits } - } -} - -impl> Iterator for BitmapIter { - type Item = u16; - - fn next(&mut self) -> Option { - loop { - if self.value == 0 { - self.key += 1; - if self.key >= BITMAP_LENGTH { - return None; - } - self.value = unsafe { *self.bits.borrow().get_unchecked(self.key) }; - continue; - } - let index = self.value.trailing_zeros() as usize; - self.value &= self.value - 1; - return Some((64 * self.key + index) as u16); - } - } - - fn size_hint(&self) -> (usize, Option) { - panic!("Should never be called (roaring::Iter caches the size_hint itself)") - } -} - -impl<'a> Iterator for Iter<'a> { - type Item = u16; - - fn next(&mut self) -> Option { - match *self { - Iter::Array(ref mut inner) => inner.next().cloned(), - Iter::Vec(ref mut inner) => inner.next(), - Iter::BitmapBorrowed(ref mut inner) => inner.next(), - Iter::BitmapOwned(ref mut inner) => inner.next(), - } - } - - fn size_hint(&self) -> (usize, Option) { - panic!("Should never be called (roaring::Iter caches the size_hint itself)") - } -} - -#[inline] -fn union_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { - let len = (arr1.len() + arr2.len()).min(4096); - let mut out = Vec::with_capacity(len); - - // Traverse both arrays - let mut i = 0; - let mut j = 0; - while i < arr1.len() && j < arr2.len() { - let a = unsafe { arr1.get_unchecked(i) }; - let b = unsafe { arr2.get_unchecked(j) }; - match a.cmp(b) { - Less => { - out.push(*a); - i += 1; - } - Greater => { - out.push(*b); - j += 1; - } - Equal => { - out.push(*a); - i += 1; - j += 1; - } - } - } - - // Store remaining elements of the arrays - out.extend_from_slice(&arr1[i..]); - out.extend_from_slice(&arr2[j..]); - - out -} - -#[inline] -fn intersect_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { - let mut out = Vec::new(); - - // Traverse both arrays - let mut i = 0; - let mut j = 0; - while i < arr1.len() && j < arr2.len() { - let a = unsafe { arr1.get_unchecked(i) }; - let b = unsafe { arr2.get_unchecked(j) }; - match a.cmp(b) { - Less => i += 1, - Greater => j += 1, - Equal => { - out.push(*a); - i += 1; - j += 1; - } - } - } - - out -} - -#[inline] -fn difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { - let mut out = Vec::new(); - - // Traverse both arrays - let mut i = 0; - let mut j = 0; - while i < arr1.len() && j < arr2.len() { - let a = unsafe { arr1.get_unchecked(i) }; - let b = unsafe { arr2.get_unchecked(j) }; - match a.cmp(b) { - Less => { - out.push(*a); - i += 1; - } - Greater => j += 1, - Equal => { - i += 1; - j += 1; - } - } - } - - // Store remaining elements of the left array - out.extend_from_slice(&arr1[i..]); - - out -} - -#[inline] -fn symmetric_difference_arrays(arr1: &[u16], arr2: &[u16]) -> Vec { - let mut out = Vec::new(); - - // Traverse both arrays - let mut i = 0; - let mut j = 0; - while i < arr1.len() && j < arr2.len() { - let a = unsafe { arr1.get_unchecked(i) }; - let b = unsafe { arr2.get_unchecked(j) }; - match a.cmp(b) { - Less => { - out.push(*a); - i += 1; - } - Greater => { - out.push(*b); - j += 1; - } - Equal => { - i += 1; - j += 1; - } - } - } - - // Store remaining elements of the arrays - out.extend_from_slice(&arr1[i..]); - out.extend_from_slice(&arr2[j..]); - - out -} - -#[inline] -fn key(index: u16) -> usize { - index as usize / 64 -} - -#[inline] -fn bit(index: u16) -> usize { - index as usize % 64 -} - -#[cfg(test)] -mod tests { - use super::*; - - fn as_vec(s: Store) -> Vec { - if let Store::Array(v) = s { - return v; - } - as_vec(s.to_array()) - } - - #[test] - #[allow(clippy::reversed_empty_ranges)] - fn test_array_insert_invalid_range() { - let mut store = Store::Array(vec![1, 2, 8, 9]); - - // Insert a range with start > end. - let new = store.insert_range(6..=1); - assert_eq!(new, 0); - - assert_eq!(as_vec(store), vec![1, 2, 8, 9]); - } - - #[test] - fn test_array_insert_range() { - let mut store = Store::Array(vec![1, 2, 8, 9]); - - let new = store.insert_range(4..=5); - assert_eq!(new, 2); - - assert_eq!(as_vec(store), vec![1, 2, 4, 5, 8, 9]); - } - - #[test] - fn test_array_insert_range_left_overlap() { - let mut store = Store::Array(vec![1, 2, 8, 9]); - - let new = store.insert_range(2..=5); - assert_eq!(new, 3); - - assert_eq!(as_vec(store), vec![1, 2, 3, 4, 5, 8, 9]); - } - - #[test] - fn test_array_insert_range_right_overlap() { - let mut store = Store::Array(vec![1, 2, 8, 9]); - - let new = store.insert_range(4..=8); - assert_eq!(new, 4); - - assert_eq!(as_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]); - } - - #[test] - fn test_array_insert_range_full_overlap() { - let mut store = Store::Array(vec![1, 2, 8, 9]); - - let new = store.insert_range(1..=9); - assert_eq!(new, 5); - - assert_eq!(as_vec(store), vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); - } - - #[test] - #[allow(clippy::reversed_empty_ranges)] - fn test_bitmap_insert_invalid_range() { - let store = Store::Array(vec![1, 2, 8, 9]); - let mut store = store.to_bitmap(); - - // Insert a range with start > end. - let new = store.insert_range(6..=1); - assert_eq!(new, 0); - - assert_eq!(as_vec(store), vec![1, 2, 8, 9]); - } - - #[test] - fn test_bitmap_insert_same_key_overlap() { - let store = Store::Array(vec![1, 2, 3, 62, 63]); - let mut store = store.to_bitmap(); - - let new = store.insert_range(1..=62); - assert_eq!(new, 58); - - assert_eq!(as_vec(store), (1..64).collect::>()); - } - - #[test] - fn test_bitmap_insert_range() { - let store = Store::Array(vec![1, 2, 130]); - let mut store = store.to_bitmap(); - - let new = store.insert_range(4..=128); - assert_eq!(new, 125); - - let mut want = vec![1, 2]; - want.extend(4..129); - want.extend(&[130]); - - assert_eq!(as_vec(store), want); - } - - #[test] - fn test_bitmap_insert_range_left_overlap() { - let store = Store::Array(vec![1, 2, 130]); - let mut store = store.to_bitmap(); - - let new = store.insert_range(1..=128); - assert_eq!(new, 126); - - let mut want = Vec::new(); - want.extend(1..129); - want.extend(&[130]); - - assert_eq!(as_vec(store), want); - } - - #[test] - fn test_bitmap_insert_range_right_overlap() { - let store = Store::Array(vec![1, 2, 130]); - let mut store = store.to_bitmap(); - - let new = store.insert_range(4..=132); - assert_eq!(new, 128); - - let mut want = vec![1, 2]; - want.extend(4..133); - - assert_eq!(as_vec(store), want); - } - - #[test] - fn test_bitmap_insert_range_full_overlap() { - let store = Store::Array(vec![1, 2, 130]); - let mut store = store.to_bitmap(); - - let new = store.insert_range(1..=134); - assert_eq!(new, 131); - - let mut want = Vec::new(); - want.extend(1..135); - - assert_eq!(as_vec(store), want); - } -} diff --git a/src/bitmap/store/array_store.rs b/src/bitmap/store/array_store.rs new file mode 100644 index 00000000..6e2b78d3 --- /dev/null +++ b/src/bitmap/store/array_store.rs @@ -0,0 +1,569 @@ +use std::cmp::Ordering; +use std::cmp::Ordering::*; +use std::convert::{TryFrom, TryInto}; +use std::fmt::{Display, Formatter}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign}; + +use super::bitmap_store::{bit, key, BitmapStore, BITMAP_LENGTH}; + +#[derive(Clone, Eq, PartialEq)] +pub struct ArrayStore { + vec: Vec, +} + +impl ArrayStore { + pub fn new() -> ArrayStore { + ArrayStore { vec: vec![] } + } + + /// + /// Create a new SortedU16Vec from a given vec + /// It is up to the caller to ensure the vec is sorted and deduplicated + /// Favor `try_from` / `try_into` for cases in which these invariants should be checked + /// + /// # Panics + /// + /// When debug_assertions are enabled and the above invariants are not met + pub fn from_vec_unchecked(vec: Vec) -> ArrayStore { + if cfg!(debug_assertions) { + vec.try_into().unwrap() + } else { + ArrayStore { vec } + } + } + + pub fn insert(&mut self, index: u16) -> bool { + self.vec.binary_search(&index).map_err(|loc| self.vec.insert(loc, index)).is_err() + } + + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + let start = *range.start(); + let end = *range.end(); + + // Figure out the starting/ending position in the vec. + let pos_start = self.vec.binary_search(&start).unwrap_or_else(|x| x); + let pos_end = self + .vec + .binary_search_by(|p| { + // binary search the right most position when equals + match p.cmp(&end) { + Greater => Greater, + _ => Less, + } + }) + .unwrap_or_else(|x| x); + + // Overwrite the range in the middle - there's no need to take + // into account any existing elements between start and end, as + // they're all being added to the set. + let dropped = self.vec.splice(pos_start..pos_end, start..=end); + + end as u64 - start as u64 + 1 - dropped.len() as u64 + } + + pub fn push(&mut self, index: u16) -> bool { + if self.max().map_or(true, |max| max < index) { + self.vec.push(index); + true + } else { + false + } + } + + pub fn remove(&mut self, index: u16) -> bool { + self.vec.binary_search(&index).map(|loc| self.vec.remove(loc)).is_ok() + } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + let start = *range.start(); + let end = *range.end(); + + // Figure out the starting/ending position in the vec. + let pos_start = self.vec.binary_search(&start).unwrap_or_else(|x| x); + let pos_end = self + .vec + .binary_search_by(|p| { + // binary search the right most position when equals + match p.cmp(&end) { + Greater => Greater, + _ => Less, + } + }) + .unwrap_or_else(|x| x); + self.vec.drain(pos_start..pos_end); + (pos_end - pos_start) as u64 + } + + pub fn contains(&self, index: u16) -> bool { + self.vec.binary_search(&index).is_ok() + } + + pub fn is_disjoint(&self, other: &Self) -> bool { + let (mut i1, mut i2) = (self.vec.iter(), other.vec.iter()); + let (mut value1, mut value2) = (i1.next(), i2.next()); + loop { + match value1.and_then(|v1| value2.map(|v2| v1.cmp(v2))) { + None => return true, + Some(Equal) => return false, + Some(Less) => value1 = i1.next(), + Some(Greater) => value2 = i2.next(), + } + } + } + + pub fn is_subset(&self, other: &Self) -> bool { + let (mut i1, mut i2) = (self.iter(), other.iter()); + let (mut value1, mut value2) = (i1.next(), i2.next()); + loop { + match (value1, value2) { + (None, _) => return true, + (Some(..), None) => return false, + (Some(v1), Some(v2)) => match v1.cmp(v2) { + Equal => { + value1 = i1.next(); + value2 = i2.next(); + } + Less => return false, + Greater => value2 = i2.next(), + }, + } + } + } + + pub fn to_bitmap_store(&self) -> BitmapStore { + let mut bits = Box::new([0; BITMAP_LENGTH]); + let len = self.len() as u64; + + for &index in self.iter() { + bits[key(index)] |= 1 << bit(index); + } + BitmapStore::from_unchecked(len, bits) + } + + pub fn len(&self) -> u64 { + self.vec.len() as u64 + } + + pub fn min(&self) -> Option { + self.vec.first().copied() + } + + pub fn max(&self) -> Option { + self.vec.last().copied() + } + + pub fn iter(&self) -> std::slice::Iter { + self.vec.iter() + } + + pub fn into_iter(self) -> std::vec::IntoIter { + self.vec.into_iter() + } + + pub fn as_slice(&self) -> &[u16] { + &self.vec + } +} + +impl Default for ArrayStore { + fn default() -> Self { + ArrayStore::new() + } +} + +#[derive(Debug)] +pub struct Error { + index: usize, + kind: ErrorKind, +} + +#[derive(Debug)] +pub enum ErrorKind { + Duplicate, + OutOfOrder, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.kind { + ErrorKind::Duplicate => { + write!(f, "Duplicate element found at index: {}", self.index) + } + ErrorKind::OutOfOrder => { + write!(f, "An element was out of order at index: {}", self.index) + } + } + } +} + +impl std::error::Error for Error {} + +impl TryFrom> for ArrayStore { + type Error = Error; + + fn try_from(value: Vec) -> Result { + let mut iter = value.iter().enumerate(); + if let Some((_, mut prev)) = iter.next() { + for (i, cur) in iter { + match cur.cmp(prev) { + Ordering::Less => return Err(Error { index: i, kind: ErrorKind::OutOfOrder }), + Ordering::Equal => return Err(Error { index: i, kind: ErrorKind::Duplicate }), + Ordering::Greater => (), + } + prev = cur; + } + } + + Ok(ArrayStore { vec: value }) + } +} + +impl BitOr for &ArrayStore { + type Output = ArrayStore; + + fn bitor(self, rhs: Self) -> Self::Output { + let mut vec = { + let capacity = (self.vec.len() + rhs.vec.len()).min(4096); + Vec::with_capacity(capacity) + }; + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < self.vec.len() && j < rhs.vec.len() { + let a = unsafe { self.vec.get_unchecked(i) }; + let b = unsafe { rhs.vec.get_unchecked(j) }; + match a.cmp(b) { + Less => { + vec.push(*a); + i += 1; + } + Greater => { + vec.push(*b); + j += 1; + } + Equal => { + vec.push(*a); + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the arrays + vec.extend_from_slice(&self.vec[i..]); + vec.extend_from_slice(&rhs.vec[j..]); + + ArrayStore { vec } + } +} + +impl BitAnd for &ArrayStore { + type Output = ArrayStore; + + fn bitand(self, rhs: Self) -> Self::Output { + let mut vec = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < self.vec.len() && j < rhs.vec.len() { + let a = unsafe { self.vec.get_unchecked(i) }; + let b = unsafe { rhs.vec.get_unchecked(j) }; + match a.cmp(b) { + Less => i += 1, + Greater => j += 1, + Equal => { + vec.push(*a); + i += 1; + j += 1; + } + } + } + + ArrayStore { vec } + } +} + +impl BitAndAssign<&Self> for ArrayStore { + fn bitand_assign(&mut self, rhs: &Self) { + let mut i = 0; + self.vec.retain(|x| { + i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.vec.len()); + rhs.vec.get(i).map_or(false, |y| x == y) + }); + } +} + +impl BitAndAssign<&BitmapStore> for ArrayStore { + fn bitand_assign(&mut self, rhs: &BitmapStore) { + self.vec.retain(|x| rhs.contains(*x)); + } +} + +impl Sub for &ArrayStore { + type Output = ArrayStore; + + fn sub(self, rhs: Self) -> Self::Output { + let mut vec = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < self.vec.len() && j < rhs.vec.len() { + let a = unsafe { self.vec.get_unchecked(i) }; + let b = unsafe { rhs.vec.get_unchecked(j) }; + match a.cmp(b) { + Less => { + vec.push(*a); + i += 1; + } + Greater => j += 1, + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the left array + vec.extend_from_slice(&self.vec[i..]); + + ArrayStore { vec } + } +} + +impl SubAssign<&Self> for ArrayStore { + fn sub_assign(&mut self, rhs: &Self) { + let mut i = 0; + self.vec.retain(|x| { + i += rhs.iter().skip(i).position(|y| y >= x).unwrap_or(rhs.vec.len()); + rhs.vec.get(i).map_or(true, |y| x != y) + }); + } +} + +impl SubAssign<&BitmapStore> for ArrayStore { + fn sub_assign(&mut self, rhs: &BitmapStore) { + self.vec.retain(|x| !rhs.contains(*x)); + } +} + +impl BitXor for &ArrayStore { + type Output = ArrayStore; + + fn bitxor(self, rhs: Self) -> Self::Output { + let mut vec = Vec::new(); + + // Traverse both arrays + let mut i = 0; + let mut j = 0; + while i < self.vec.len() && j < rhs.vec.len() { + let a = unsafe { self.vec.get_unchecked(i) }; + let b = unsafe { rhs.vec.get_unchecked(j) }; + match a.cmp(b) { + Less => { + vec.push(*a); + i += 1; + } + Greater => { + vec.push(*b); + j += 1; + } + Equal => { + i += 1; + j += 1; + } + } + } + + // Store remaining elements of the arrays + vec.extend_from_slice(&self.vec[i..]); + vec.extend_from_slice(&rhs.vec[j..]); + + ArrayStore { vec } + } +} + +impl BitXorAssign<&Self> for ArrayStore { + fn bitxor_assign(&mut self, rhs: &Self) { + let mut i1 = 0usize; + let mut iter2 = rhs.vec.iter(); + let mut current2 = iter2.next(); + while i1 < self.vec.len() { + match current2.map(|c2| self.vec[i1].cmp(c2)) { + None => break, + Some(Less) => { + i1 += 1; + } + Some(Greater) => { + self.vec.insert(i1, *current2.unwrap()); + i1 += 1; + current2 = iter2.next(); + } + Some(Equal) => { + self.vec.remove(i1); + current2 = iter2.next(); + } + } + } + if let Some(current) = current2 { + self.vec.push(*current); + self.vec.extend(iter2.cloned()); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::bitmap::store::Store; + + fn into_vec(s: Store) -> Vec { + match s { + Store::Array(vec) => vec.vec, + Store::Bitmap(bits) => bits.to_array_store().vec, + } + } + + fn into_bitmap_store(s: Store) -> Store { + match s { + Store::Array(vec) => Store::Bitmap(vec.to_bitmap_store()), + Store::Bitmap(..) => s, + } + } + + #[test] + #[allow(clippy::reversed_empty_ranges)] + fn test_array_insert_invalid_range() { + let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + + // Insert a range with start > end. + let new = store.insert_range(6..=1); + assert_eq!(new, 0); + + assert_eq!(into_vec(store), vec![1, 2, 8, 9]); + } + + #[test] + fn test_array_insert_range() { + let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + + let new = store.insert_range(4..=5); + assert_eq!(new, 2); + + assert_eq!(into_vec(store), vec![1, 2, 4, 5, 8, 9]); + } + + #[test] + fn test_array_insert_range_left_overlap() { + let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + + let new = store.insert_range(2..=5); + assert_eq!(new, 3); + + assert_eq!(into_vec(store), vec![1, 2, 3, 4, 5, 8, 9]); + } + + #[test] + fn test_array_insert_range_right_overlap() { + let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + + let new = store.insert_range(4..=8); + assert_eq!(new, 4); + + assert_eq!(into_vec(store), vec![1, 2, 4, 5, 6, 7, 8, 9]); + } + + #[test] + fn test_array_insert_range_full_overlap() { + let mut store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + + let new = store.insert_range(1..=9); + assert_eq!(new, 5); + + assert_eq!(into_vec(store), vec![1, 2, 3, 4, 5, 6, 7, 8, 9]); + } + + #[test] + #[allow(clippy::reversed_empty_ranges)] + fn test_bitmap_insert_invalid_range() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 8, 9])); + let mut store = into_bitmap_store(store); + + // Insert a range with start > end. + let new = store.insert_range(6..=1); + assert_eq!(new, 0); + + assert_eq!(into_vec(store), vec![1, 2, 8, 9]); + } + + #[test] + fn test_bitmap_insert_same_key_overlap() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 3, 62, 63])); + let mut store = into_bitmap_store(store); + + let new = store.insert_range(1..=62); + assert_eq!(new, 58); + + assert_eq!(into_vec(store), (1..64).collect::>()); + } + + #[test] + fn test_bitmap_insert_range() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 130])); + let mut store = into_bitmap_store(store); + + let new = store.insert_range(4..=128); + assert_eq!(new, 125); + + let mut want = vec![1, 2]; + want.extend(4..129); + want.extend(&[130]); + + assert_eq!(into_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_left_overlap() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 130])); + let mut store = into_bitmap_store(store); + + let new = store.insert_range(1..=128); + assert_eq!(new, 126); + + let mut want = Vec::new(); + want.extend(1..129); + want.extend(&[130]); + + assert_eq!(into_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_right_overlap() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 130])); + let mut store = into_bitmap_store(store); + + let new = store.insert_range(4..=132); + assert_eq!(new, 128); + + let mut want = vec![1, 2]; + want.extend(4..133); + + assert_eq!(into_vec(store), want); + } + + #[test] + fn test_bitmap_insert_range_full_overlap() { + let store = Store::Array(ArrayStore::from_vec_unchecked(vec![1, 2, 130])); + let mut store = into_bitmap_store(store); + + let new = store.insert_range(1..=134); + assert_eq!(new, 131); + + let mut want = Vec::new(); + want.extend(1..135); + + assert_eq!(into_vec(store), want); + } +} diff --git a/src/bitmap/store/bitmap_store.rs b/src/bitmap/store/bitmap_store.rs new file mode 100644 index 00000000..4872de2e --- /dev/null +++ b/src/bitmap/store/bitmap_store.rs @@ -0,0 +1,360 @@ +use std::borrow::Borrow; +use std::fmt::{Display, Formatter}; +use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign}; + +use super::ArrayStore; + +pub const BITMAP_LENGTH: usize = 1024; + +#[derive(Clone)] +pub struct BitmapStore { + len: u64, + bits: Box<[u64; BITMAP_LENGTH]>, +} + +impl BitmapStore { + pub fn new() -> BitmapStore { + BitmapStore { len: 0, bits: Box::new([0; BITMAP_LENGTH]) } + } + + pub fn try_from(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> Result { + let actual_len = bits.iter().map(|v| v.count_ones() as u64).sum(); + if len != actual_len { + Err(Error { kind: ErrorKind::Cardinality { expected: len, actual: actual_len } }) + } else { + Ok(BitmapStore { len, bits }) + } + } + + /// + /// Create a new Bitmap8K from a given len and bits array + /// It is up to the caller to ensure len == cardinality of bits + /// Favor `try_from` for cases in which this invariants should be checked + /// + /// # Panics + /// + /// When debug_assertions are enabled and the above invariant is not met + pub fn from_unchecked(len: u64, bits: Box<[u64; BITMAP_LENGTH]>) -> BitmapStore { + if cfg!(debug_assertions) { + BitmapStore::try_from(len, bits).unwrap() + } else { + BitmapStore { len, bits } + } + } + + pub fn insert(&mut self, index: u16) -> bool { + let (key, bit) = (key(index), bit(index)); + let old_w = self.bits[key]; + let new_w = old_w | 1 << bit; + let inserted = (old_w ^ new_w) >> bit; // 1 or 0 + self.bits[key] = new_w; + self.len += inserted; + inserted != 0 + } + + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + let start = *range.start(); + let end = *range.end(); + + let (start_key, start_bit) = (key(start), bit(start)); + let (end_key, end_bit) = (key(end), bit(end)); + + // MSB > start_bit > end_bit > LSB + if start_key == end_key { + // Set the end_bit -> LSB to 1 + let mut mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; + // Set MSB -> start_bit to 1 + mask &= !((1 << start_bit) - 1); + + let existed = (self.bits[start_key] & mask).count_ones(); + self.bits[start_key] |= mask; + + let inserted = u64::from(end - start + 1) - u64::from(existed); + self.len += inserted; + return inserted; + } + + // Mask off the left-most bits (MSB -> start_bit) + let mask = !((1 << start_bit) - 1); + + // Keep track of the number of bits that were already set to + // return how many new bits were set later + let mut existed = (self.bits[start_key] & mask).count_ones(); + + self.bits[start_key] |= mask; + + // Set the full blocks, tracking the number of set bits + for i in (start_key + 1)..end_key { + existed += self.bits[i].count_ones(); + self.bits[i] = u64::MAX; + } + + // Set the end bits in the last chunk (MSB -> end_bit) + let mask = if end_bit == 63 { u64::MAX } else { (1 << (end_bit + 1)) - 1 }; + existed += (self.bits[end_key] & mask).count_ones(); + self.bits[end_key] |= mask; + + let inserted = end as u64 - start as u64 + 1 - existed as u64; + self.len += inserted; + inserted + } + + pub fn push(&mut self, index: u16) -> bool { + if self.max().map_or(true, |max| max < index) { + self.insert(index); + true + } else { + false + } + } + + pub fn remove(&mut self, index: u16) -> bool { + let (key, bit) = (key(index), bit(index)); + let old_w = self.bits[key]; + let new_w = old_w & !(1 << bit); + let removed = (old_w ^ new_w) >> bit; // 0 or 1 + self.bits[key] = new_w; + self.len -= removed; + removed != 0 + } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + let start = *range.start(); + let end = *range.end(); + + let (start_key, start_bit) = (key(start), bit(start)); + let (end_key, end_bit) = (key(end), bit(end)); + + if start_key == end_key { + let mask = (u64::MAX << start_bit) & (u64::MAX >> (63 - end_bit)); + let removed = (self.bits[start_key] & mask).count_ones(); + self.bits[start_key] &= !mask; + let removed = u64::from(removed); + self.len -= removed; + return removed; + } + + let mut removed = 0; + // start key bits + removed += (self.bits[start_key] & (u64::MAX << start_bit)).count_ones(); + self.bits[start_key] &= !(u64::MAX << start_bit); + // counts bits in between + for word in &self.bits[start_key + 1..end_key] { + removed += word.count_ones(); + // When popcnt is available zeroing in this loop is faster, + // but we opt to perform reasonably on most cpus by zeroing after. + // By doing that the compiler uses simd to count ones. + } + // do zeroing outside the loop + for word in &mut self.bits[start_key + 1..end_key] { + *word = 0; + } + // end key bits + removed += (self.bits[end_key] & (u64::MAX >> (63 - end_bit))).count_ones(); + self.bits[end_key] &= !(u64::MAX >> (63 - end_bit)); + let removed = u64::from(removed); + self.len -= removed; + removed + } + + pub fn contains(&self, index: u16) -> bool { + self.bits[key(index)] & (1 << bit(index)) != 0 + } + + pub fn is_disjoint(&self, other: &BitmapStore) -> bool { + self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == 0) + } + + pub fn is_subset(&self, other: &Self) -> bool { + self.bits.iter().zip(other.bits.iter()).all(|(&i1, &i2)| (i1 & i2) == i1) + } + + pub fn to_array_store(&self) -> ArrayStore { + let mut vec = Vec::with_capacity(self.len as usize); + for (index, mut bit) in self.bits.iter().cloned().enumerate() { + while bit != 0 { + vec.push((u64::trailing_zeros(bit) + (64 * index as u32)) as u16); + bit &= bit - 1; + } + } + ArrayStore::from_vec_unchecked(vec) + } + + pub fn len(&self) -> u64 { + self.len + } + + pub fn min(&self) -> Option { + self.bits + .iter() + .enumerate() + .find(|&(_, &bit)| bit != 0) + .map(|(index, bit)| (index * 64 + (bit.trailing_zeros() as usize)) as u16) + } + + pub fn max(&self) -> Option { + self.bits + .iter() + .enumerate() + .rev() + .find(|&(_, &bit)| bit != 0) + .map(|(index, bit)| (index * 64 + (63 - bit.leading_zeros() as usize)) as u16) + } + + pub fn iter(&self) -> BitmapIter<&[u64; BITMAP_LENGTH]> { + BitmapIter::new(&self.bits) + } + + pub fn into_iter(self) -> BitmapIter> { + BitmapIter::new(self.bits) + } + + pub fn as_array(&self) -> &[u64; BITMAP_LENGTH] { + &self.bits + } +} + +impl Default for BitmapStore { + fn default() -> Self { + BitmapStore::new() + } +} + +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, +} + +#[derive(Debug)] +pub enum ErrorKind { + Cardinality { expected: u64, actual: u64 }, +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.kind { + ErrorKind::Cardinality { expected, actual } => { + write!(f, "Expected cardinality was {} but was {}", expected, actual) + } + } + } +} + +impl std::error::Error for Error {} + +pub struct BitmapIter> { + key: usize, + value: u64, + bits: B, +} + +impl> BitmapIter { + fn new(bits: B) -> BitmapIter { + BitmapIter { key: 0, value: bits.borrow()[0], bits } + } +} + +impl> Iterator for BitmapIter { + type Item = u16; + + fn next(&mut self) -> Option { + loop { + if self.value == 0 { + self.key += 1; + if self.key >= BITMAP_LENGTH { + return None; + } + self.value = unsafe { *self.bits.borrow().get_unchecked(self.key) }; + continue; + } + let index = self.value.trailing_zeros() as usize; + self.value &= self.value - 1; + return Some((64 * self.key + index) as u16); + } + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") + } +} + +#[inline] +pub fn key(index: u16) -> usize { + index as usize / 64 +} + +#[inline] +pub fn bit(index: u16) -> usize { + index as usize % 64 +} + +#[inline] +fn op_bitmaps(bits1: &mut BitmapStore, bits2: &BitmapStore, op: impl Fn(&mut u64, u64)) { + bits1.len = 0; + for (index1, &index2) in bits1.bits.iter_mut().zip(bits2.bits.iter()) { + op(index1, index2); + bits1.len += index1.count_ones() as u64; + } +} + +impl BitOrAssign<&Self> for BitmapStore { + fn bitor_assign(&mut self, rhs: &Self) { + op_bitmaps(self, rhs, BitOrAssign::bitor_assign); + } +} + +impl BitOrAssign<&ArrayStore> for BitmapStore { + fn bitor_assign(&mut self, rhs: &ArrayStore) { + for &index in rhs.iter() { + let (key, bit) = (key(index), bit(index)); + let old_w = self.bits[key]; + let new_w = old_w | 1 << bit; + self.len += (old_w ^ new_w) >> bit; + self.bits[key] = new_w; + } + } +} + +impl BitAndAssign<&Self> for BitmapStore { + fn bitand_assign(&mut self, rhs: &Self) { + op_bitmaps(self, rhs, BitAndAssign::bitand_assign); + } +} + +impl SubAssign<&Self> for BitmapStore { + fn sub_assign(&mut self, rhs: &Self) { + op_bitmaps(self, rhs, |l, r| *l &= !r); + } +} + +impl SubAssign<&ArrayStore> for BitmapStore { + fn sub_assign(&mut self, rhs: &ArrayStore) { + for &index in rhs.iter() { + let (key, bit) = (key(index), bit(index)); + let old_w = self.bits[key]; + let new_w = old_w & !(1 << bit); + self.len -= (old_w ^ new_w) >> bit; + self.bits[key] = new_w; + } + } +} + +impl BitXorAssign<&Self> for BitmapStore { + fn bitxor_assign(&mut self, rhs: &Self) { + op_bitmaps(self, rhs, BitXorAssign::bitxor_assign); + } +} + +impl BitXorAssign<&ArrayStore> for BitmapStore { + fn bitxor_assign(&mut self, rhs: &ArrayStore) { + let mut len = self.len as i64; + for &index in rhs.iter() { + let (key, bit) = (key(index), bit(index)); + let old_w = self.bits[key]; + let new_w = old_w ^ 1 << bit; + len += 1 - 2 * (((1 << bit) & old_w) >> bit) as i64; // +1 or -1 + self.bits[key] = new_w; + } + self.len = len as u64; + } +} diff --git a/src/bitmap/store/mod.rs b/src/bitmap/store/mod.rs new file mode 100644 index 00000000..30c805b9 --- /dev/null +++ b/src/bitmap/store/mod.rs @@ -0,0 +1,421 @@ +mod array_store; +mod bitmap_store; + +use std::mem; +use std::ops::{ + BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeInclusive, Sub, SubAssign, +}; +use std::{slice, vec}; + +use self::bitmap_store::BITMAP_LENGTH; +use self::Store::{Array, Bitmap}; + +pub use self::array_store::ArrayStore; +pub use self::bitmap_store::{BitmapIter, BitmapStore}; + +#[derive(Clone)] +pub enum Store { + Array(ArrayStore), + Bitmap(BitmapStore), +} + +pub enum Iter<'a> { + Array(slice::Iter<'a, u16>), + Vec(vec::IntoIter), + BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), + BitmapOwned(BitmapIter>), +} + +impl Store { + pub fn new() -> Store { + Store::Array(ArrayStore::new()) + } + + pub fn insert(&mut self, index: u16) -> bool { + match *self { + Array(ref mut vec) => vec.insert(index), + Bitmap(ref mut bits) => bits.insert(index), + } + } + + pub fn insert_range(&mut self, range: RangeInclusive) -> u64 { + // A Range is defined as being of size 0 if start >= end. + if range.is_empty() { + return 0; + } + + match *self { + Array(ref mut vec) => vec.insert_range(range), + Bitmap(ref mut bits) => bits.insert_range(range), + } + } + + /// Push `index` at the end of the store only if `index` is the new max. + /// + /// Returns whether `index` was effectively pushed. + pub fn push(&mut self, index: u16) -> bool { + match self { + Array(vec) => vec.push(index), + Bitmap(bits) => bits.push(index), + } + } + + pub fn remove(&mut self, index: u16) -> bool { + match *self { + Array(ref mut vec) => vec.remove(index), + Bitmap(ref mut bits) => bits.remove(index), + } + } + + pub fn remove_range(&mut self, range: RangeInclusive) -> u64 { + if range.is_empty() { + return 0; + } + + match *self { + Array(ref mut vec) => vec.remove_range(range), + Bitmap(ref mut bits) => bits.remove_range(range), + } + } + + pub fn contains(&self, index: u16) -> bool { + match *self { + Array(ref vec) => vec.contains(index), + Bitmap(ref bits) => bits.contains(index), + } + } + + pub fn is_disjoint(&self, other: &Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => vec1.is_disjoint(vec2), + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1.is_disjoint(bits2), + (&Array(ref vec), &Bitmap(ref bits)) | (&Bitmap(ref bits), &Array(ref vec)) => { + vec.iter().all(|&i| !bits.contains(i)) + } + } + } + + pub fn is_subset(&self, other: &Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => vec1.is_subset(vec2), + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1.is_subset(bits2), + (&Array(ref vec), &Bitmap(ref bits)) => vec.iter().all(|&i| bits.contains(i)), + (&Bitmap(..), &Array(..)) => false, + } + } + + pub fn len(&self) -> u64 { + match *self { + Array(ref vec) => vec.len(), + Bitmap(ref bits) => bits.len(), + } + } + + pub fn min(&self) -> Option { + match *self { + Array(ref vec) => vec.min(), + Bitmap(ref bits) => bits.min(), + } + } + + pub fn max(&self) -> Option { + match *self { + Array(ref vec) => vec.max(), + Bitmap(ref bits) => bits.max(), + } + } +} + +impl Default for Store { + fn default() -> Self { + Store::new() + } +} + +impl BitOr<&Store> for &Store { + type Output = Store; + + fn bitor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(BitOr::bitor(vec1, vec2)), + (&Bitmap(..), &Array(..)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Bitmap(..), &Bitmap(..)) => { + let mut lhs = self.clone(); + BitOrAssign::bitor_assign(&mut lhs, rhs); + lhs + } + (&Array(..), &Bitmap(..)) => { + let mut rhs = rhs.clone(); + BitOrAssign::bitor_assign(&mut rhs, self); + rhs + } + } + } +} + +impl BitOrAssign for Store { + fn bitor_assign(&mut self, mut rhs: Store) { + match (self, &mut rhs) { + (&mut Array(ref mut vec1), &mut Array(ref vec2)) => { + *vec1 = BitOr::bitor(&*vec1, vec2); + } + (&mut Bitmap(ref mut bits1), &mut Array(ref vec2)) => { + BitOrAssign::bitor_assign(bits1, vec2); + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + BitOrAssign::bitor_assign(bits1, bits2); + } + (this @ &mut Array(..), &mut Bitmap(..)) => { + mem::swap(this, &mut rhs); + BitOrAssign::bitor_assign(this, rhs); + } + } + } +} + +impl BitOrAssign<&Store> for Store { + fn bitor_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + let this = mem::take(vec1); + *vec1 = BitOr::bitor(&this, vec2); + } + (&mut Bitmap(ref mut bits1), &Array(ref vec2)) => { + BitOrAssign::bitor_assign(bits1, vec2); + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + BitOrAssign::bitor_assign(bits1, bits2); + } + (this @ &mut Array(..), &Bitmap(ref bits2)) => { + let mut lhs: Store = Bitmap(bits2.clone()); + BitOrAssign::bitor_assign(&mut lhs, &*this); + *this = lhs; + } + } + } +} + +impl BitAnd<&Store> for &Store { + type Output = Store; + + fn bitand(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(BitAnd::bitand(vec1, vec2)), + (&Bitmap(..), &Array(..)) => { + let mut rhs = rhs.clone(); + BitAndAssign::bitand_assign(&mut rhs, self); + rhs + } + _ => { + let mut lhs = self.clone(); + BitAndAssign::bitand_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl BitAndAssign for Store { + #[allow(clippy::suspicious_op_assign_impl)] + fn bitand_assign(&mut self, mut rhs: Store) { + match (self, &mut rhs) { + (&mut Array(ref mut vec1), &mut Array(ref mut vec2)) => { + if vec2.len() < vec1.len() { + mem::swap(vec1, vec2); + } + BitAndAssign::bitand_assign(vec1, &*vec2); + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + BitAndAssign::bitand_assign(bits1, bits2); + } + (&mut Array(ref mut vec1), &mut Bitmap(ref bits2)) => { + BitAndAssign::bitand_assign(vec1, bits2); + } + (this @ &mut Bitmap(..), &mut Array(..)) => { + mem::swap(this, &mut rhs); + BitAndAssign::bitand_assign(this, rhs); + } + } + } +} + +impl BitAndAssign<&Store> for Store { + #[allow(clippy::suspicious_op_assign_impl)] + fn bitand_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + let (mut lhs, rhs) = if vec2.len() < vec1.len() { + (vec2.clone(), &*vec1) + } else { + (mem::take(vec1), vec2) + }; + + BitAndAssign::bitand_assign(&mut lhs, rhs); + *vec1 = lhs; + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + BitAndAssign::bitand_assign(bits1, bits2); + } + (&mut Array(ref mut vec1), &Bitmap(ref bits2)) => { + BitAndAssign::bitand_assign(vec1, bits2); + } + (this @ &mut Bitmap(..), &Array(..)) => { + let mut new = rhs.clone(); + BitAndAssign::bitand_assign(&mut new, &*this); + *this = new; + } + } + } +} + +impl Sub<&Store> for &Store { + type Output = Store; + + fn sub(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(Sub::sub(vec1, vec2)), + _ => { + let mut lhs = self.clone(); + SubAssign::sub_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl SubAssign<&Store> for Store { + fn sub_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + SubAssign::sub_assign(vec1, vec2); + } + (&mut Bitmap(ref mut bits1), &Array(ref vec2)) => { + SubAssign::sub_assign(bits1, vec2); + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + SubAssign::sub_assign(bits1, bits2); + } + (&mut Array(ref mut vec1), &Bitmap(ref bits2)) => { + SubAssign::sub_assign(vec1, bits2); + } + } + } +} + +impl BitXor<&Store> for &Store { + type Output = Store; + + fn bitxor(self, rhs: &Store) -> Store { + match (self, rhs) { + (&Array(ref vec1), &Array(ref vec2)) => Array(BitXor::bitxor(vec1, vec2)), + (&Array(..), &Bitmap(..)) => { + let mut lhs = rhs.clone(); + BitXorAssign::bitxor_assign(&mut lhs, self); + lhs + } + _ => { + let mut lhs = self.clone(); + BitXorAssign::bitxor_assign(&mut lhs, rhs); + lhs + } + } + } +} + +impl BitXorAssign for Store { + fn bitxor_assign(&mut self, mut rhs: Store) { + // TODO improve this function + match (self, &mut rhs) { + (&mut Array(ref mut vec1), &mut Array(ref mut vec2)) => { + BitXorAssign::bitxor_assign(vec1, vec2); + } + (&mut Bitmap(ref mut bits1), &mut Array(ref mut vec2)) => { + BitXorAssign::bitxor_assign(bits1, &*vec2); + } + (&mut Bitmap(ref mut bits1), &mut Bitmap(ref bits2)) => { + BitXorAssign::bitxor_assign(bits1, bits2); + } + (this @ &mut Array(..), &mut Bitmap(..)) => { + mem::swap(this, &mut rhs); + BitXorAssign::bitxor_assign(this, rhs); + } + } + } +} + +impl BitXorAssign<&Store> for Store { + fn bitxor_assign(&mut self, rhs: &Store) { + match (self, rhs) { + (&mut Array(ref mut vec1), &Array(ref vec2)) => { + BitXorAssign::bitxor_assign(vec1, vec2); + } + (&mut Bitmap(ref mut bits1), &Array(ref vec2)) => { + BitXorAssign::bitxor_assign(bits1, vec2); + } + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + BitXorAssign::bitxor_assign(bits1, bits2); + } + (this @ &mut Array(..), &Bitmap(..)) => { + let mut new = rhs.clone(); + BitXorAssign::bitxor_assign(&mut new, &*this); + *this = new; + } + } + } +} + +impl<'a> IntoIterator for &'a Store { + type Item = u16; + type IntoIter = Iter<'a>; + fn into_iter(self) -> Iter<'a> { + match *self { + Array(ref vec) => Iter::Array(vec.iter()), + Bitmap(ref bits) => Iter::BitmapBorrowed(bits.iter()), + } + } +} + +impl IntoIterator for Store { + type Item = u16; + type IntoIter = Iter<'static>; + fn into_iter(self) -> Iter<'static> { + match self { + Array(vec) => Iter::Vec(vec.into_iter()), + Bitmap(bits) => Iter::BitmapOwned(bits.into_iter()), + } + } +} + +impl PartialEq for Store { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (&Array(ref vec1), &Array(ref vec2)) => vec1 == vec2, + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { + bits1.len() == bits2.len() + && bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) + } + _ => false, + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = u16; + + fn next(&mut self) -> Option { + match *self { + Iter::Array(ref mut inner) => inner.next().cloned(), + Iter::Vec(ref mut inner) => inner.next(), + Iter::BitmapBorrowed(ref mut inner) => inner.next(), + Iter::BitmapOwned(ref mut inner) => inner.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") + } +}