From 9aa7dd1e6afc0f8c944c63458fba0ea19ae2c392 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Fri, 8 Jan 2021 13:02:23 -0800 Subject: [PATCH 1/4] Specialize Box clones to try to avoid locals For generic `T: Clone`, we can allocate an uninitialized box beforehand, which gives the optimizer a chance to create the clone directly in the heap. For `T: Copy`, we can go further and do a simple memory copy, regardless of optimization level. --- library/alloc/src/boxed.rs | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/boxed.rs b/library/alloc/src/boxed.rs index 33b812ec59ff9..7a9cf1948ea0c 100644 --- a/library/alloc/src/boxed.rs +++ b/library/alloc/src/boxed.rs @@ -1014,10 +1014,14 @@ impl Clone for Box { /// // But they are unique objects /// assert_ne!(&*x as *const i32, &*y as *const i32); /// ``` - #[rustfmt::skip] #[inline] fn clone(&self) -> Self { - Self::new_in((**self).clone(), self.1.clone()) + // Pre-allocate memory to allow writing the cloned value directly. + let mut boxed = Self::new_uninit_in(self.1.clone()); + unsafe { + (**self).write_clone_into_raw(boxed.as_mut_ptr()); + boxed.assume_init() + } } /// Copies `source`'s contents into `self` without creating a new allocation. @@ -1043,6 +1047,28 @@ impl Clone for Box { } } +/// Specialize clones into pre-allocated, uninitialized memory. +pub(crate) trait WriteCloneIntoRaw: Sized { + unsafe fn write_clone_into_raw(&self, target: *mut Self); +} + +impl WriteCloneIntoRaw for T { + #[inline] + default unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // Having allocated *first* may allow the optimizer to create + // the cloned value in-place, skipping the local and move. + unsafe { target.write(self.clone()) }; + } +} + +impl WriteCloneIntoRaw for T { + #[inline] + unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // We can always copy in-place, without ever involving a local value. + unsafe { target.copy_from_nonoverlapping(self, 1) }; + } +} + #[stable(feature = "box_slice_clone", since = "1.3.0")] impl Clone for Box { fn clone(&self) -> Self { From d85df44e8d54f92a23d8734cb05d14c75697a2ca Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Fri, 8 Jan 2021 13:02:27 -0800 Subject: [PATCH 2/4] Specialize Rc/Arc::make_mut clones to try to avoid locals As we did with `Box`, we can allocate an uninitialized `Rc` or `Arc` beforehand, giving the optimizer a chance to skip the local value for regular clones, or avoid any local altogether for `T: Copy`. --- library/alloc/src/rc.rs | 11 +++++++++-- library/alloc/src/sync.rs | 12 +++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 8183a582d337a..b267377d90a2e 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -265,6 +265,7 @@ use core::slice::from_raw_parts_mut; use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout}; use crate::borrow::{Cow, ToOwned}; +use crate::boxed::WriteCloneIntoRaw; use crate::string::String; use crate::vec::Vec; @@ -1037,8 +1038,14 @@ impl Rc { #[stable(feature = "rc_unique", since = "1.4.0")] pub fn make_mut(this: &mut Self) -> &mut T { if Rc::strong_count(this) != 1 { - // Gotta clone the data, there are other Rcs - *this = Rc::new((**this).clone()) + // Gotta clone the data, there are other Rcs. + // Pre-allocate memory to allow writing the cloned value directly. + let mut rc = Self::new_uninit(); + unsafe { + let data = Rc::get_mut_unchecked(&mut rc); + (**this).write_clone_into_raw(data.as_mut_ptr()); + *this = rc.assume_init(); + } } else if Rc::weak_count(this) != 0 { // Can just steal the data, all that's left is Weaks unsafe { diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 06ad621727166..deeb6941fcf8c 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -24,7 +24,7 @@ use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst}; use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout}; use crate::borrow::{Cow, ToOwned}; -use crate::boxed::Box; +use crate::boxed::{Box, WriteCloneIntoRaw}; use crate::rc::is_dangling; use crate::string::String; use crate::vec::Vec; @@ -1369,8 +1369,14 @@ impl Arc { // weak count, there's no chance the ArcInner itself could be // deallocated. if this.inner().strong.compare_exchange(1, 0, Acquire, Relaxed).is_err() { - // Another strong pointer exists; clone - *this = Arc::new((**this).clone()); + // Another strong pointer exists, so we must clone. + // Pre-allocate memory to allow writing the cloned value directly. + let mut arc = Self::new_uninit(); + unsafe { + let data = Arc::get_mut_unchecked(&mut arc); + (**this).write_clone_into_raw(data.as_mut_ptr()); + *this = arc.assume_init(); + } } else if this.inner().weak.load(Relaxed) != 1 { // Relaxed suffices in the above because this is fundamentally an // optimization: we are always racing with weak pointers being From f89f30fb2cabe7883ead5777980f75088455efb6 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Mon, 11 Jan 2021 17:32:21 -0800 Subject: [PATCH 3/4] Move directly when Rc/Arc::make_mut splits from Weak When only other `Weak` references remain, we can directly move the data into the new unique allocation as a plain memory copy. --- library/alloc/src/rc.rs | 12 +++++++----- library/alloc/src/sync.rs | 15 ++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index b267377d90a2e..c81405d6119e1 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -1048,14 +1048,16 @@ impl Rc { } } else if Rc::weak_count(this) != 0 { // Can just steal the data, all that's left is Weaks + let mut rc = Self::new_uninit(); unsafe { - let mut swap = Rc::new(ptr::read(&this.ptr.as_ref().value)); - mem::swap(this, &mut swap); - swap.inner().dec_strong(); + let data = Rc::get_mut_unchecked(&mut rc); + data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1); + + this.inner().dec_strong(); // Remove implicit strong-weak ref (no need to craft a fake // Weak here -- we know other Weaks can clean up for us) - swap.inner().dec_weak(); - forget(swap); + this.inner().dec_weak(); + ptr::write(this, rc.assume_init()); } } // This unsafety is ok because we're guaranteed that the pointer diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index deeb6941fcf8c..5bfcbeb82a13d 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1392,17 +1392,14 @@ impl Arc { // Materialize our own implicit weak pointer, so that it can clean // up the ArcInner as needed. - let weak = Weak { ptr: this.ptr }; + let _weak = Weak { ptr: this.ptr }; - // mark the data itself as already deallocated + // Can just steal the data, all that's left is Weaks + let mut arc = Self::new_uninit(); unsafe { - // there is no data race in the implicit write caused by `read` - // here (due to zeroing) because data is no longer accessed by - // other threads (due to there being no more strong refs at this - // point). - let mut swap = Arc::new(ptr::read(&weak.ptr.as_ref().data)); - mem::swap(this, &mut swap); - mem::forget(swap); + let data = Arc::get_mut_unchecked(&mut arc); + data.as_mut_ptr().copy_from_nonoverlapping(&**this, 1); + ptr::write(this, arc.assume_init()); } } else { // We were the sole reference of either kind; bump back up the From 1f1a3b48572961e8ab1ec00dd57c2d1e94c75348 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Tue, 12 Jan 2021 12:24:28 -0800 Subject: [PATCH 4/4] move WriteCloneIntoRaw into alloc::alloc --- library/alloc/src/alloc.rs | 23 +++++++++++++++++++++++ library/alloc/src/boxed.rs | 24 +----------------------- library/alloc/src/rc.rs | 5 +++-- library/alloc/src/sync.rs | 6 ++++-- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/library/alloc/src/alloc.rs b/library/alloc/src/alloc.rs index 4fbcc4590f1a9..cb9daaea0001b 100644 --- a/library/alloc/src/alloc.rs +++ b/library/alloc/src/alloc.rs @@ -397,3 +397,26 @@ pub mod __alloc_error_handler { unsafe { oom_impl(layout) } } } + +/// Specialize clones into pre-allocated, uninitialized memory. +/// Used by `Box::clone` and `Rc`/`Arc::make_mut`. +pub(crate) trait WriteCloneIntoRaw: Sized { + unsafe fn write_clone_into_raw(&self, target: *mut Self); +} + +impl WriteCloneIntoRaw for T { + #[inline] + default unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // Having allocated *first* may allow the optimizer to create + // the cloned value in-place, skipping the local and move. + unsafe { target.write(self.clone()) }; + } +} + +impl WriteCloneIntoRaw for T { + #[inline] + unsafe fn write_clone_into_raw(&self, target: *mut Self) { + // We can always copy in-place, without ever involving a local value. + unsafe { target.copy_from_nonoverlapping(self, 1) }; + } +} diff --git a/library/alloc/src/boxed.rs b/library/alloc/src/boxed.rs index 7a9cf1948ea0c..0aa52b35ced45 100644 --- a/library/alloc/src/boxed.rs +++ b/library/alloc/src/boxed.rs @@ -151,7 +151,7 @@ use core::pin::Pin; use core::ptr::{self, Unique}; use core::task::{Context, Poll}; -use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout}; +use crate::alloc::{handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw}; use crate::borrow::Cow; use crate::raw_vec::RawVec; use crate::str::from_boxed_utf8_unchecked; @@ -1047,28 +1047,6 @@ impl Clone for Box { } } -/// Specialize clones into pre-allocated, uninitialized memory. -pub(crate) trait WriteCloneIntoRaw: Sized { - unsafe fn write_clone_into_raw(&self, target: *mut Self); -} - -impl WriteCloneIntoRaw for T { - #[inline] - default unsafe fn write_clone_into_raw(&self, target: *mut Self) { - // Having allocated *first* may allow the optimizer to create - // the cloned value in-place, skipping the local and move. - unsafe { target.write(self.clone()) }; - } -} - -impl WriteCloneIntoRaw for T { - #[inline] - unsafe fn write_clone_into_raw(&self, target: *mut Self) { - // We can always copy in-place, without ever involving a local value. - unsafe { target.copy_from_nonoverlapping(self, 1) }; - } -} - #[stable(feature = "box_slice_clone", since = "1.3.0")] impl Clone for Box { fn clone(&self) -> Self { diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index c81405d6119e1..0973a6e362bc2 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -263,9 +263,10 @@ use core::pin::Pin; use core::ptr::{self, NonNull}; use core::slice::from_raw_parts_mut; -use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout}; +use crate::alloc::{ + box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw, +}; use crate::borrow::{Cow, ToOwned}; -use crate::boxed::WriteCloneIntoRaw; use crate::string::String; use crate::vec::Vec; diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index 5bfcbeb82a13d..05bfeccbda132 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -22,9 +22,11 @@ use core::slice::from_raw_parts_mut; use core::sync::atomic; use core::sync::atomic::Ordering::{Acquire, Relaxed, Release, SeqCst}; -use crate::alloc::{box_free, handle_alloc_error, AllocError, Allocator, Global, Layout}; +use crate::alloc::{ + box_free, handle_alloc_error, AllocError, Allocator, Global, Layout, WriteCloneIntoRaw, +}; use crate::borrow::{Cow, ToOwned}; -use crate::boxed::{Box, WriteCloneIntoRaw}; +use crate::boxed::Box; use crate::rc::is_dangling; use crate::string::String; use crate::vec::Vec;