Skip to content

Commit

Permalink
Auto merge of #549 - the-mikedavis:md/hash-table-iter-hash, r=Amanieu
Browse files Browse the repository at this point in the history
Add `HashTable::iter_hash`, `HashTable::iter_hash_mut`

This is a follow-up to #546 ([comment](#546 (comment))). `iter_hash` from the old raw API can be useful for reading from a "bag" / "multi map" type which allows duplicate key-value pairs. Exposing it safely in `HashTable` takes a fairly small wrapper around `RawIterHash`. This PR partially reverts #546 to restore `RawTable::iter_hash` and its associated types.
  • Loading branch information
bors committed Sep 2, 2024
2 parents aa1411b + 8b60594 commit a69af93
Show file tree
Hide file tree
Showing 2 changed files with 273 additions and 1 deletion.
130 changes: 130 additions & 0 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,20 @@ impl<T, A: Allocator> RawTable<T, A> {
self.table.iter()
}

/// Returns an iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may
/// return items that have a hash value different than the one provided. You
/// should always validate the returned values before using them.
///
/// It is up to the caller to ensure that the `RawTable` outlives the
/// `RawIterHash`. Because we cannot make the `next` method unsafe on the
/// `RawIterHash` struct, we have to make the `iter_hash` method unsafe.
#[cfg_attr(feature = "inline-more", inline)]
pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<T> {
RawIterHash::new(self, hash)
}

/// Returns an iterator which removes all elements from the table without
/// freeing the memory.
#[cfg_attr(feature = "inline-more", inline)]
Expand Down Expand Up @@ -3996,6 +4010,122 @@ impl<T, A: Allocator> Iterator for RawDrain<'_, T, A> {
impl<T, A: Allocator> ExactSizeIterator for RawDrain<'_, T, A> {}
impl<T, A: Allocator> FusedIterator for RawDrain<'_, T, A> {}

/// Iterator over occupied buckets that could match a given hash.
///
/// `RawTable` only stores 7 bits of the hash value, so this iterator may return
/// items that have a hash value different than the one provided. You should
/// always validate the returned values before using them.
///
/// For maximum flexibility this iterator is not bound by a lifetime, but you
/// must observe several rules when using it:
/// - You must not free the hash table while iterating (including via growing/shrinking).
/// - It is fine to erase a bucket that has been yielded by the iterator.
/// - Erasing a bucket that has not yet been yielded by the iterator may still
/// result in the iterator yielding that bucket.
/// - It is unspecified whether an element inserted after the iterator was
/// created will be yielded by that iterator.
/// - The order in which the iterator yields buckets is unspecified and may
/// change in the future.
pub struct RawIterHash<T> {
inner: RawIterHashInner,
_marker: PhantomData<T>,
}

struct RawIterHashInner {
// See `RawTableInner`'s corresponding fields for details.
// We can't store a `*const RawTableInner` as it would get
// invalidated by the user calling `&mut` methods on `RawTable`.
bucket_mask: usize,
ctrl: NonNull<u8>,

// The top 7 bits of the hash.
h2_hash: u8,

// The sequence of groups to probe in the search.
probe_seq: ProbeSeq,

group: Group,

// The elements within the group with a matching h2-hash.
bitmask: BitMaskIter,
}

impl<T> RawIterHash<T> {
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new<A: Allocator>(table: &RawTable<T, A>, hash: u64) -> Self {
RawIterHash {
inner: RawIterHashInner::new(&table.table, hash),
_marker: PhantomData,
}
}
}

impl RawIterHashInner {
#[cfg_attr(feature = "inline-more", inline)]
unsafe fn new(table: &RawTableInner, hash: u64) -> Self {
let h2_hash = h2(hash);
let probe_seq = table.probe_seq(hash);
let group = Group::load(table.ctrl(probe_seq.pos));
let bitmask = group.match_byte(h2_hash).into_iter();

RawIterHashInner {
bucket_mask: table.bucket_mask,
ctrl: table.ctrl,
h2_hash,
probe_seq,
group,
bitmask,
}
}
}

impl<T> Iterator for RawIterHash<T> {
type Item = Bucket<T>;

fn next(&mut self) -> Option<Bucket<T>> {
unsafe {
match self.inner.next() {
Some(index) => {
// Can't use `RawTable::bucket` here as we don't have
// an actual `RawTable` reference to use.
debug_assert!(index <= self.inner.bucket_mask);
let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index);
Some(bucket)
}
None => None,
}
}
}
}

impl Iterator for RawIterHashInner {
type Item = usize;

fn next(&mut self) -> Option<Self::Item> {
unsafe {
loop {
if let Some(bit) = self.bitmask.next() {
let index = (self.probe_seq.pos + bit) & self.bucket_mask;
return Some(index);
}
if likely(self.group.match_empty().any_bit_set()) {
return None;
}
self.probe_seq.move_next(self.bucket_mask);

// Can't use `RawTableInner::ctrl` here as we don't have
// an actual `RawTableInner` reference to use.
let index = self.probe_seq.pos;
debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH);
let group_ctrl = self.ctrl.as_ptr().add(index);

self.group = Group::load(group_ctrl);
self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
}
}
}
}

pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
pub iter: RawIter<T>,
pub table: &'a mut RawTable<T, A>,
Expand Down
144 changes: 143 additions & 1 deletion src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData};
use crate::{
raw::{
Allocator, Bucket, Global, InsertSlot, RawDrain, RawExtractIf, RawIntoIter, RawIter,
RawTable,
RawIterHash, RawTable,
},
TryReserveError,
};
Expand Down Expand Up @@ -741,6 +741,98 @@ where
}
}

/// An iterator visiting all elements which may match a hash.
/// The iterator element type is `&'a T`.
///
/// This iterator may return elements from the table that have a hash value
/// different than the one provided. You should always validate the returned
/// values before using them.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "nightly")]
/// # fn test() {
/// use hashbrown::{HashTable, DefaultHashBuilder};
/// use std::hash::BuildHasher;
///
/// let mut table = HashTable::new();
/// let hasher = DefaultHashBuilder::default();
/// let hasher = |val: &_| hasher.hash_one(val);
/// table.insert_unique(hasher(&"a"), "a", hasher);
/// table.insert_unique(hasher(&"a"), "b", hasher);
/// table.insert_unique(hasher(&"b"), "c", hasher);
///
/// // Will print "a" and "b" (and possibly "c") in an arbitrary order.
/// for x in table.iter_hash(hasher(&"a")) {
/// println!("{}", x);
/// }
/// # }
/// # fn main() {
/// # #[cfg(feature = "nightly")]
/// # test()
/// # }
/// ```
pub fn iter_hash(&self, hash: u64) -> IterHash<'_, T> {
IterHash {
inner: unsafe { self.raw.iter_hash(hash) },
_marker: PhantomData,
}
}

/// A mutable iterator visiting all elements which may match a hash.
/// The iterator element type is `&'a mut T`.
///
/// This iterator may return elements from the table that have a hash value
/// different than the one provided. You should always validate the returned
/// values before using them.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "nightly")]
/// # fn test() {
/// use hashbrown::{HashTable, DefaultHashBuilder};
/// use std::hash::BuildHasher;
///
/// let mut table = HashTable::new();
/// let hasher = DefaultHashBuilder::default();
/// let hasher = |val: &_| hasher.hash_one(val);
/// table.insert_unique(hasher(&1), 2, hasher);
/// table.insert_unique(hasher(&1), 3, hasher);
/// table.insert_unique(hasher(&2), 5, hasher);
///
/// // Update matching values
/// for val in table.iter_hash_mut(hasher(&1)) {
/// *val *= 2;
/// }
///
/// assert_eq!(table.len(), 3);
/// let mut vec: Vec<i32> = Vec::new();
///
/// for val in &table {
/// println!("val: {}", val);
/// vec.push(*val);
/// }
///
/// // The values will contain 4 and 6 and may contain either 5 or 10.
/// assert!(vec.contains(&4));
/// assert!(vec.contains(&6));
///
/// assert_eq!(table.len(), 3);
/// # }
/// # fn main() {
/// # #[cfg(feature = "nightly")]
/// # test()
/// # }
/// ```
pub fn iter_hash_mut(&mut self, hash: u64) -> IterHashMut<'_, T> {
IterHashMut {
inner: unsafe { self.raw.iter_hash(hash) },
_marker: PhantomData,
}
}

/// Retains only the elements specified by the predicate.
///
/// In other words, remove all elements `e` such that `f(&e)` returns `false`.
Expand Down Expand Up @@ -1932,6 +2024,56 @@ impl<T> ExactSizeIterator for IterMut<'_, T> {

impl<T> FusedIterator for IterMut<'_, T> {}

/// An iterator over the entries of a `HashTable` that could match a given hash.
/// The iterator element type is `&'a T`.
///
/// This `struct` is created by the [`iter_hash`] method on [`HashTable`]. See its
/// documentation for more.
///
/// [`iter_hash`]: struct.HashTable.html#method.iter_hash
/// [`HashTable`]: struct.HashTable.html
pub struct IterHash<'a, T> {
inner: RawIterHash<T>,
_marker: PhantomData<&'a T>,
}

impl<'a, T> Iterator for IterHash<'a, T> {
type Item = &'a T;

fn next(&mut self) -> Option<Self::Item> {
// Avoid `Option::map` because it bloats LLVM IR.
match self.inner.next() {
Some(bucket) => Some(unsafe { bucket.as_ref() }),
None => None,
}
}
}

/// A mutable iterator over the entries of a `HashTable` that could match a given hash.
/// The iterator element type is `&'a mut T`.
///
/// This `struct` is created by the [`iter_hash_mut`] method on [`HashTable`]. See its
/// documentation for more.
///
/// [`iter_hash_mut`]: struct.HashTable.html#method.iter_hash_mut
/// [`HashTable`]: struct.HashTable.html
pub struct IterHashMut<'a, T> {
inner: RawIterHash<T>,
_marker: PhantomData<&'a mut T>,
}

impl<'a, T> Iterator for IterHashMut<'a, T> {
type Item = &'a mut T;

fn next(&mut self) -> Option<Self::Item> {
// Avoid `Option::map` because it bloats LLVM IR.
match self.inner.next() {
Some(bucket) => Some(unsafe { bucket.as_mut() }),
None => None,
}
}
}

/// An owning iterator over the entries of a `HashTable` in arbitrary order.
/// The iterator element type is `T`.
///
Expand Down

0 comments on commit a69af93

Please sign in to comment.