From 0cac69077e738cb22914e77a9a9dd3fd712d5670 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 20 Sep 2024 11:18:03 +0200 Subject: [PATCH] feat: make implicit free-list more controllable This is done by three new `Repository` methods: * `empty_reusable_buffer()` - hook into the free-list yourself. * `set_freelist()` - enable or initialize the free-list. * `without_freelist()` - a builder to disable the freelist from the start. --- gix/src/object/tree/mod.rs | 2 +- gix/src/repository/freelist.rs | 97 ++++++++++++++++++++++++++++++++++ gix/src/repository/impls.rs | 10 +++- gix/src/repository/init.rs | 2 +- gix/src/repository/mod.rs | 21 +------- gix/src/repository/object.rs | 16 +----- gix/src/types.rs | 2 +- gix/tests/repository/object.rs | 25 ++++++++- 8 files changed, 136 insertions(+), 39 deletions(-) create mode 100644 gix/src/repository/freelist.rs diff --git a/gix/src/object/tree/mod.rs b/gix/src/object/tree/mod.rs index 65906c42fda..647030fa769 100644 --- a/gix/src/object/tree/mod.rs +++ b/gix/src/object/tree/mod.rs @@ -61,7 +61,7 @@ impl<'repo> Tree<'repo> { I: IntoIterator, P: PartialEq, { - let mut buf = self.repo.shared_empty_buf(); + let mut buf = self.repo.empty_reusable_buffer(); buf.clear(); let mut path = path.into_iter().peekable(); diff --git a/gix/src/repository/freelist.rs b/gix/src/repository/freelist.rs new file mode 100644 index 00000000000..0ee5bf63f13 --- /dev/null +++ b/gix/src/repository/freelist.rs @@ -0,0 +1,97 @@ +use std::cell::RefCell; +use std::ops::{Deref, DerefMut}; + +/// A buffer that is returned to the free-list after usage. +#[derive(Clone)] +pub struct Buffer<'repo> { + /// The buffer that would be returned to the freelist of `repo`. + /// Note that buffers without capacity (i.e. without allocation) aren't returned. + pub inner: Vec, + /// The repository from whose free-list the `inner` buffer was taken, and to which it will be returned. + pub repo: &'repo crate::Repository, +} + +impl From> for Vec { + fn from(mut value: Buffer<'_>) -> Self { + std::mem::take(&mut value.inner) + } +} + +impl Deref for Buffer<'_> { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for Buffer<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl Drop for Buffer<'_> { + fn drop(&mut self) { + self.repo.reuse_buffer(&mut self.inner); + } +} + +/// Internal +impl crate::Repository { + /// Note that the returned buffer might still have data in it. + #[inline] + pub(crate) fn free_buf(&self) -> Vec { + self.bufs + .as_ref() + .and_then(|bufs| bufs.borrow_mut().pop()) + .unwrap_or_default() + } + + /// This method is commonly called from the destructor of objects that previously claimed an entry + /// in the free-list with [crate::Repository::free_buf]. + /// They are welcome to take out the data themselves, for instance when the object is detached, to avoid + /// it to be reclaimed. + #[inline] + pub(crate) fn reuse_buffer(&self, data: &mut Vec) { + if data.capacity() > 0 { + if let Some(bufs) = self.bufs.as_ref() { + bufs.borrow_mut().push(std::mem::take(data)); + } + } + } +} + +/// Freelist configuration +/// +/// The free-list is an internal and 'transparent' mechanism for obtaining and re-using memory buffers when +/// reading objects. That way, trashing is avoided as buffers are re-used and re-written. +/// +/// However, there are circumstances when releasing memory early is preferred, for instance on the server side. +/// +/// Also note that the free-list isn't cloned, so each clone of this instance starts with an empty one. +impl crate::Repository { + /// Return an empty buffer which is tied to this repository instance, and reuse its memory allocation by + /// keeping it around even after it drops. + pub fn empty_reusable_buffer(&self) -> Buffer<'_> { + let mut inner = self.free_buf(); + inner.clear(); + Buffer { inner, repo: self } + } + + /// Set the currently used freelist to `list`. If `None`, it will be disabled entirely. + /// + /// Return the currently previously allocated free-list, a list of reusable buffers typically used when reading objects. + /// May be `None` if there was no free-list. + pub fn set_freelist(&mut self, list: Option>>) -> Option>> { + let previous = self.bufs.take(); + self.bufs = list.map(RefCell::new); + previous.map(RefCell::into_inner) + } + + /// A builder method to disable the free-list on a newly created instance. + pub fn without_freelist(mut self) -> Self { + self.bufs.take(); + self + } +} diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index 4b7fbf57d5b..1dbf21fa2e0 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -1,6 +1,6 @@ impl Clone for crate::Repository { fn clone(&self) -> Self { - crate::Repository::from_refs_and_objects( + let mut new = crate::Repository::from_refs_and_objects( self.refs.clone(), self.objects.clone(), self.work_tree.clone(), @@ -12,7 +12,13 @@ impl Clone for crate::Repository { self.shallow_commits.clone(), #[cfg(feature = "attributes")] self.modules.clone(), - ) + ); + + if self.bufs.is_none() { + new.bufs.take(); + } + + new } } diff --git a/gix/src/repository/init.rs b/gix/src/repository/init.rs index 1e7e6ecfdd8..3832dfba39a 100644 --- a/gix/src/repository/init.rs +++ b/gix/src/repository/init.rs @@ -15,7 +15,7 @@ impl crate::Repository { ) -> Self { setup_objects(&mut objects, &config); crate::Repository { - bufs: RefCell::new(Vec::with_capacity(4)), + bufs: Some(RefCell::new(Vec::with_capacity(4))), work_tree, common_dir, objects, diff --git a/gix/src/repository/mod.rs b/gix/src/repository/mod.rs index 41b2aaeca52..696b18f458c 100644 --- a/gix/src/repository/mod.rs +++ b/gix/src/repository/mod.rs @@ -17,25 +17,6 @@ pub enum Kind { }, } -/// Internal -impl crate::Repository { - #[inline] - pub(crate) fn free_buf(&self) -> Vec { - self.bufs.borrow_mut().pop().unwrap_or_default() - } - - /// This method is commonly called from the destructor of objects that previously claimed an entry - /// in the free-list with `free_buf()`. - /// They are welcome to take out the data themselves, for instance when the object is detached, to avoid - /// it to be reclaimed. - #[inline] - pub(crate) fn reuse_buffer(&self, data: &mut Vec) { - if data.capacity() > 0 { - self.bufs.borrow_mut().push(std::mem::take(data)); - } - } -} - #[cfg(any(feature = "attributes", feature = "excludes"))] pub mod attributes; mod cache; @@ -49,6 +30,8 @@ mod dirwalk; /// #[cfg(feature = "attributes")] pub mod filter; +/// +pub mod freelist; mod graph; pub(crate) mod identity; mod impls; diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index 7fe3ded853f..dcb999aa763 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -158,24 +158,12 @@ impl crate::Repository { /// Write objects of any type. impl crate::Repository { - pub(crate) fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec> { - let mut bufs = self.bufs.borrow_mut(); - if bufs.last().is_none() { - bufs.push(Vec::with_capacity(512)); - } - std::cell::RefMut::map(bufs, |bufs| { - let buf = bufs.last_mut().expect("we assure one is present"); - buf.clear(); - buf - }) - } - /// Write the given object into the object database and return its object id. /// /// Note that we hash the object in memory to avoid storing objects that are already present. That way, /// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected. pub fn write_object(&self, object: impl gix_object::WriteTo) -> Result, object::write::Error> { - let mut buf = self.shared_empty_buf(); + let mut buf = self.empty_reusable_buffer(); object.write_to(buf.deref_mut()).expect("write to memory works"); self.write_object_inner(&buf, object.kind()) @@ -219,7 +207,7 @@ impl crate::Repository { &self, mut bytes: impl std::io::Read + std::io::Seek, ) -> Result, object::write::Error> { - let mut buf = self.shared_empty_buf(); + let mut buf = self.empty_reusable_buffer(); std::io::copy(&mut bytes, buf.deref_mut()).expect("write to memory works"); self.write_blob_stream_inner(&buf) diff --git a/gix/src/types.rs b/gix/src/types.rs index fa03f601e95..20d240e99bb 100644 --- a/gix/src/types.rs +++ b/gix/src/types.rs @@ -151,7 +151,7 @@ pub struct Repository { /// The path to the resolved common directory if this is a linked worktree repository or it is otherwise set. pub(crate) common_dir: Option, /// A free-list of reusable object backing buffers - pub(crate) bufs: RefCell>>, + pub(crate) bufs: Option>>>, /// A pre-assembled selection of often-accessed configuration values for quick access. pub(crate) config: crate::config::Cache, /// the options obtained when instantiating this repository. diff --git a/gix/tests/repository/object.rs b/gix/tests/repository/object.rs index 27993e23b4b..a4979900073 100644 --- a/gix/tests/repository/object.rs +++ b/gix/tests/repository/object.rs @@ -255,6 +255,7 @@ mod write_blob { let (_tmp, repo) = empty_bare_repo()?; let mut cursor = std::io::Cursor::new(b"hello world"); let mut seek_cursor = cursor.clone(); + let mut repo = repo.without_freelist(); let oid = repo.write_blob_stream(&mut cursor)?; assert_eq!(oid, hex_to_id("95d09f2b10159347eece71399a7e2e907ea3df4f")); @@ -271,13 +272,15 @@ mod write_blob { &b"world"[..], "the seek position is taken into account, so only part of the input data is written" ); + + assert!(repo.set_freelist(None).is_none(), "previous list was already dropped"); Ok(()) } } #[test] fn writes_avoid_io_using_duplicate_check() -> crate::Result { - let repo = crate::named_repo("make_packed_and_loose.sh")?; + let mut repo = crate::named_repo("make_packed_and_loose.sh")?; let store = gix::odb::loose::Store::at(repo.git_dir().join("objects"), repo.object_hash()); let loose_count = store.iter().count(); assert_eq!(loose_count, 3, "there are some loose objects"); @@ -326,6 +329,26 @@ fn writes_avoid_io_using_duplicate_check() -> crate::Result { loose_count, "no new object was written as all of them already existed" ); + + { + let buf = repo.empty_reusable_buffer(); + assert!(buf.is_empty(), "the freelist buffer must be clearerd"); + let mut other_buf = buf.clone(); + other_buf.inner = Vec::new(); + } + + let freelist = repo.set_freelist(None).expect("free list is present by default"); + assert_eq!( + freelist.len(), + 2, + "only one object was read at a time, and one is written" + ); + + let mut repo_clone = repo.clone(); + assert!( + repo_clone.set_freelist(None).is_none(), + "new instances inherit the free-list configuration of their parent" + ); Ok(()) }