Skip to content

Commit

Permalink
Auto merge of #98004 - paolobarbolini:vecdeque-extend-trustedlen, r=t…
Browse files Browse the repository at this point in the history
…he8472

Add VecDeque::extend from TrustedLen specialization

Continuation of #95904

Inspired by how [`VecDeque::copy_slice` works](https://github.com/rust-lang/rust/blob/c08b235a5ce10167632bb0fddcd0c5d67f2d42e3/library/alloc/src/collections/vec_deque/mod.rs#L437-L454).

## Benchmarks

Before

```
test vec_deque::bench_extend_chained_bytes      ... bench:       1,026 ns/iter (+/- 17)
test vec_deque::bench_extend_chained_trustedlen ... bench:       1,024 ns/iter (+/- 40)
test vec_deque::bench_extend_trustedlen         ... bench:         637 ns/iter (+/- 693)
```

After

```
test vec_deque::bench_extend_chained_bytes      ... bench:         828 ns/iter (+/- 24)
test vec_deque::bench_extend_chained_trustedlen ... bench:          25 ns/iter (+/- 1)
test vec_deque::bench_extend_trustedlen         ... bench:          21 ns/iter (+/- 0)
```

## Why do it this way

https://rust.godbolt.org/z/15qY1fMYh

The Compiler Explorer example shows how "just" removing the capacity check, like the [`Vec` `TrustedLen` specialization](https://github.com/rust-lang/rust/blob/c08b235a5ce10167632bb0fddcd0c5d67f2d42e3/library/alloc/src/vec/spec_extend.rs#L22-L58) does, wouldn't have been enough for `VecDeque`. `wrap_add` would still have greatly limited what LLVM could do while optimizing.

---

r? `@the8472`
  • Loading branch information
bors committed Jun 18, 2022
2 parents cdcc53b + ce3b6f5 commit 2cec687
Show file tree
Hide file tree
Showing 8 changed files with 218 additions and 3 deletions.
32 changes: 32 additions & 0 deletions library/alloc/benches/vec_deque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,35 @@ fn bench_extend_vec(b: &mut Bencher) {
ring.extend(black_box(input));
});
}

#[bench]
fn bench_extend_trustedlen(b: &mut Bencher) {
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);

b.iter(|| {
ring.clear();
ring.extend(black_box(0..512));
});
}

#[bench]
fn bench_extend_chained_trustedlen(b: &mut Bencher) {
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);

b.iter(|| {
ring.clear();
ring.extend(black_box((0..256).chain(768..1024)));
});
}

#[bench]
fn bench_extend_chained_bytes(b: &mut Bencher) {
let mut ring: VecDeque<u16> = VecDeque::with_capacity(1000);
let input1: &[u16] = &[128; 256];
let input2: &[u16] = &[255; 256];

b.iter(|| {
ring.clear();
ring.extend(black_box(input1.iter().chain(input2.iter())));
});
}
19 changes: 19 additions & 0 deletions library/alloc/src/collections/vec_deque/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,25 @@ impl<T, A: Allocator> VecDeque<T, A> {
}
}

/// Writes all values from `iter` to `dst`.
///
/// # Safety
///
/// Assumes no wrapping around happens.
/// Assumes capacity is sufficient.
#[inline]
unsafe fn write_iter(
&mut self,
dst: usize,
iter: impl Iterator<Item = T>,
written: &mut usize,
) {
iter.enumerate().for_each(|(i, element)| unsafe {
self.buffer_write(dst + i, element);
*written += 1;
});
}

/// Frobs the head and tail sections around to handle the fact that we
/// just reallocated. Unsafe because it trusts old_capacity.
#[inline]
Expand Down
59 changes: 59 additions & 0 deletions library/alloc/src/collections/vec_deque/spec_extend.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::alloc::Allocator;
use crate::vec;
use core::iter::{ByRefSized, TrustedLen};
use core::slice;

use super::VecDeque;
Expand Down Expand Up @@ -34,6 +35,64 @@ where
}
}

impl<T, I, A: Allocator> SpecExtend<T, I> for VecDeque<T, A>
where
I: TrustedLen<Item = T>,
{
default fn spec_extend(&mut self, mut iter: I) {
// This is the case for a TrustedLen iterator.
let (low, high) = iter.size_hint();
if let Some(additional) = high {
debug_assert_eq!(
low,
additional,
"TrustedLen iterator's size hint is not exact: {:?}",
(low, high)
);
self.reserve(additional);

struct WrapAddOnDrop<'a, T, A: Allocator> {
vec_deque: &'a mut VecDeque<T, A>,
written: usize,
}

impl<'a, T, A: Allocator> Drop for WrapAddOnDrop<'a, T, A> {
fn drop(&mut self) {
self.vec_deque.head =
self.vec_deque.wrap_add(self.vec_deque.head, self.written);
}
}

let mut wrapper = WrapAddOnDrop { vec_deque: self, written: 0 };

let head_room = wrapper.vec_deque.cap() - wrapper.vec_deque.head;
unsafe {
wrapper.vec_deque.write_iter(
wrapper.vec_deque.head,
ByRefSized(&mut iter).take(head_room),
&mut wrapper.written,
);

if additional > head_room {
wrapper.vec_deque.write_iter(0, iter, &mut wrapper.written);
}
}

debug_assert_eq!(
additional, wrapper.written,
"The number of items written to VecDeque doesn't match the TrustedLen size hint"
);
} else {
// Per TrustedLen contract a `None` upper bound means that the iterator length
// truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
// Since the other branch already panics eagerly (via `reserve()`) we do the same here.
// This avoids additional codegen for a fallback code path which would eventually
// panic anyway.
panic!("capacity overflow");
}
}
}

impl<T, A: Allocator> SpecExtend<T, vec::IntoIter<T>> for VecDeque<T, A> {
fn spec_extend(&mut self, mut iterator: vec::IntoIter<T>) {
let slice = iterator.as_slice();
Expand Down
97 changes: 97 additions & 0 deletions library/alloc/src/collections/vec_deque/tests.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use core::iter::TrustedLen;

use super::*;

#[bench]
Expand Down Expand Up @@ -791,6 +793,101 @@ fn test_from_vec() {
assert_eq!(vd.len(), vec.len());
}

#[test]
fn test_extend_basic() {
test_extend_impl(false);
}

#[test]
fn test_extend_trusted_len() {
test_extend_impl(true);
}

fn test_extend_impl(trusted_len: bool) {
struct VecDequeTester {
test: VecDeque<usize>,
expected: VecDeque<usize>,
trusted_len: bool,
}

impl VecDequeTester {
fn new(trusted_len: bool) -> Self {
Self { test: VecDeque::new(), expected: VecDeque::new(), trusted_len }
}

fn test_extend<I>(&mut self, iter: I)
where
I: Iterator<Item = usize> + TrustedLen + Clone,
{
struct BasicIterator<I>(I);
impl<I> Iterator for BasicIterator<I>
where
I: Iterator<Item = usize>,
{
type Item = usize;

fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}

if self.trusted_len {
self.test.extend(iter.clone());
} else {
self.test.extend(BasicIterator(iter.clone()));
}

for item in iter {
self.expected.push_back(item)
}

assert_eq!(self.test, self.expected);
let (a1, b1) = self.test.as_slices();
let (a2, b2) = self.expected.as_slices();
assert_eq!(a1, a2);
assert_eq!(b1, b2);
}

fn drain<R: RangeBounds<usize> + Clone>(&mut self, range: R) {
self.test.drain(range.clone());
self.expected.drain(range);

assert_eq!(self.test, self.expected);
}

fn clear(&mut self) {
self.test.clear();
self.expected.clear();
}

fn remaining_capacity(&self) -> usize {
self.test.capacity() - self.test.len()
}
}

let mut tester = VecDequeTester::new(trusted_len);

// Initial capacity
tester.test_extend(0..tester.remaining_capacity() - 1);

// Grow
tester.test_extend(1024..2048);

// Wrap around
tester.drain(..128);

tester.test_extend(0..tester.remaining_capacity() - 1);

// Continue
tester.drain(256..);
tester.test_extend(4096..8196);

tester.clear();

// Start again
tester.test_extend(0..32);
}

#[test]
#[should_panic = "capacity overflow"]
fn test_from_vec_zst_overflow() {
Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
#![feature(unchecked_math)]
#![feature(unicode_internals)]
#![feature(unsize)]
#![feature(std_internals)]
//
// Language features:
#![feature(allocator_internals)]
Expand Down
6 changes: 5 additions & 1 deletion library/core/src/iter/adapters/by_ref_sized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ use crate::ops::Try;
///
/// Ideally this will no longer be required, eventually, but as can be seen in
/// the benchmarks (as of Feb 2022 at least) `by_ref` can have performance cost.
pub(crate) struct ByRefSized<'a, I>(pub &'a mut I);
#[unstable(feature = "std_internals", issue = "none")]
#[derive(Debug)]
pub struct ByRefSized<'a, I>(pub &'a mut I);

#[unstable(feature = "std_internals", issue = "none")]
impl<I: Iterator> Iterator for ByRefSized<'_, I> {
type Item = I::Item;

Expand Down Expand Up @@ -47,6 +50,7 @@ impl<I: Iterator> Iterator for ByRefSized<'_, I> {
}
}

#[unstable(feature = "std_internals", issue = "none")]
impl<I: DoubleEndedIterator> DoubleEndedIterator for ByRefSized<'_, I> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
Expand Down
3 changes: 2 additions & 1 deletion library/core/src/iter/adapters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ pub use self::{
scan::Scan, skip::Skip, skip_while::SkipWhile, take::Take, take_while::TakeWhile, zip::Zip,
};

pub(crate) use self::by_ref_sized::ByRefSized;
#[unstable(feature = "std_internals", issue = "none")]
pub use self::by_ref_sized::ByRefSized;

#[stable(feature = "iter_cloned", since = "1.1.0")]
pub use self::cloned::Cloned;
Expand Down
4 changes: 3 additions & 1 deletion library/core/src/iter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ pub use self::traits::{

#[stable(feature = "iter_zip", since = "1.59.0")]
pub use self::adapters::zip;
#[unstable(feature = "std_internals", issue = "none")]
pub use self::adapters::ByRefSized;
#[stable(feature = "iter_cloned", since = "1.1.0")]
pub use self::adapters::Cloned;
#[stable(feature = "iter_copied", since = "1.36.0")]
Expand All @@ -422,7 +424,7 @@ pub use self::adapters::{
#[unstable(feature = "iter_intersperse", reason = "recently added", issue = "79524")]
pub use self::adapters::{Intersperse, IntersperseWith};

pub(crate) use self::adapters::{try_process, ByRefSized};
pub(crate) use self::adapters::try_process;

mod adapters;
mod range;
Expand Down

0 comments on commit 2cec687

Please sign in to comment.