Skip to content

Commit

Permalink
feat: make implicit free-list more controllable
Browse files Browse the repository at this point in the history
This is done by three new `Repository` methods:

* `empty_reusable_buffer()` - hook into the free-list yourself.
* `set_freelist()` - enable or initialize the free-list.
* `without_freelist()` - a builder to disable the freelist from the start.
  • Loading branch information
Byron committed Sep 23, 2024
1 parent 35c7213 commit 0cac690
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 39 deletions.
2 changes: 1 addition & 1 deletion gix/src/object/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ impl<'repo> Tree<'repo> {
I: IntoIterator<Item = P>,
P: PartialEq<BStr>,
{
let mut buf = self.repo.shared_empty_buf();
let mut buf = self.repo.empty_reusable_buffer();
buf.clear();

let mut path = path.into_iter().peekable();
Expand Down
97 changes: 97 additions & 0 deletions gix/src/repository/freelist.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use std::cell::RefCell;
use std::ops::{Deref, DerefMut};

/// A buffer that is returned to the free-list after usage.
#[derive(Clone)]
pub struct Buffer<'repo> {
/// The buffer that would be returned to the freelist of `repo`.
/// Note that buffers without capacity (i.e. without allocation) aren't returned.
pub inner: Vec<u8>,
/// The repository from whose free-list the `inner` buffer was taken, and to which it will be returned.
pub repo: &'repo crate::Repository,
}

impl From<Buffer<'_>> for Vec<u8> {
fn from(mut value: Buffer<'_>) -> Self {
std::mem::take(&mut value.inner)
}
}

impl Deref for Buffer<'_> {
type Target = Vec<u8>;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl DerefMut for Buffer<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}

impl Drop for Buffer<'_> {
fn drop(&mut self) {
self.repo.reuse_buffer(&mut self.inner);
}
}

/// Internal
impl crate::Repository {
/// Note that the returned buffer might still have data in it.
#[inline]
pub(crate) fn free_buf(&self) -> Vec<u8> {
self.bufs
.as_ref()
.and_then(|bufs| bufs.borrow_mut().pop())
.unwrap_or_default()
}

/// This method is commonly called from the destructor of objects that previously claimed an entry
/// in the free-list with [crate::Repository::free_buf].
/// They are welcome to take out the data themselves, for instance when the object is detached, to avoid
/// it to be reclaimed.
#[inline]
pub(crate) fn reuse_buffer(&self, data: &mut Vec<u8>) {
if data.capacity() > 0 {
if let Some(bufs) = self.bufs.as_ref() {
bufs.borrow_mut().push(std::mem::take(data));
}
}
}
}

/// Freelist configuration
///
/// The free-list is an internal and 'transparent' mechanism for obtaining and re-using memory buffers when
/// reading objects. That way, trashing is avoided as buffers are re-used and re-written.
///
/// However, there are circumstances when releasing memory early is preferred, for instance on the server side.
///
/// Also note that the free-list isn't cloned, so each clone of this instance starts with an empty one.
impl crate::Repository {
/// Return an empty buffer which is tied to this repository instance, and reuse its memory allocation by
/// keeping it around even after it drops.
pub fn empty_reusable_buffer(&self) -> Buffer<'_> {
let mut inner = self.free_buf();
inner.clear();
Buffer { inner, repo: self }
}

/// Set the currently used freelist to `list`. If `None`, it will be disabled entirely.
///
/// Return the currently previously allocated free-list, a list of reusable buffers typically used when reading objects.
/// May be `None` if there was no free-list.
pub fn set_freelist(&mut self, list: Option<Vec<Vec<u8>>>) -> Option<Vec<Vec<u8>>> {
let previous = self.bufs.take();
self.bufs = list.map(RefCell::new);
previous.map(RefCell::into_inner)
}

/// A builder method to disable the free-list on a newly created instance.
pub fn without_freelist(mut self) -> Self {
self.bufs.take();
self
}
}
10 changes: 8 additions & 2 deletions gix/src/repository/impls.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
impl Clone for crate::Repository {
fn clone(&self) -> Self {
crate::Repository::from_refs_and_objects(
let mut new = crate::Repository::from_refs_and_objects(
self.refs.clone(),
self.objects.clone(),
self.work_tree.clone(),
Expand All @@ -12,7 +12,13 @@ impl Clone for crate::Repository {
self.shallow_commits.clone(),
#[cfg(feature = "attributes")]
self.modules.clone(),
)
);

if self.bufs.is_none() {
new.bufs.take();
}

new
}
}

Expand Down
2 changes: 1 addition & 1 deletion gix/src/repository/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl crate::Repository {
) -> Self {
setup_objects(&mut objects, &config);
crate::Repository {
bufs: RefCell::new(Vec::with_capacity(4)),
bufs: Some(RefCell::new(Vec::with_capacity(4))),
work_tree,
common_dir,
objects,
Expand Down
21 changes: 2 additions & 19 deletions gix/src/repository/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,6 @@ pub enum Kind {
},
}

/// Internal
impl crate::Repository {
#[inline]
pub(crate) fn free_buf(&self) -> Vec<u8> {
self.bufs.borrow_mut().pop().unwrap_or_default()
}

/// This method is commonly called from the destructor of objects that previously claimed an entry
/// in the free-list with `free_buf()`.
/// They are welcome to take out the data themselves, for instance when the object is detached, to avoid
/// it to be reclaimed.
#[inline]
pub(crate) fn reuse_buffer(&self, data: &mut Vec<u8>) {
if data.capacity() > 0 {
self.bufs.borrow_mut().push(std::mem::take(data));
}
}
}

#[cfg(any(feature = "attributes", feature = "excludes"))]
pub mod attributes;
mod cache;
Expand All @@ -49,6 +30,8 @@ mod dirwalk;
///
#[cfg(feature = "attributes")]
pub mod filter;
///
pub mod freelist;
mod graph;
pub(crate) mod identity;
mod impls;
Expand Down
16 changes: 2 additions & 14 deletions gix/src/repository/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,24 +158,12 @@ impl crate::Repository {

/// Write objects of any type.
impl crate::Repository {
pub(crate) fn shared_empty_buf(&self) -> std::cell::RefMut<'_, Vec<u8>> {
let mut bufs = self.bufs.borrow_mut();
if bufs.last().is_none() {
bufs.push(Vec::with_capacity(512));
}
std::cell::RefMut::map(bufs, |bufs| {
let buf = bufs.last_mut().expect("we assure one is present");
buf.clear();
buf
})
}

/// Write the given object into the object database and return its object id.
///
/// Note that we hash the object in memory to avoid storing objects that are already present. That way,
/// we avoid writing duplicate objects using slow disks that will eventually have to be garbage collected.
pub fn write_object(&self, object: impl gix_object::WriteTo) -> Result<Id<'_>, object::write::Error> {
let mut buf = self.shared_empty_buf();
let mut buf = self.empty_reusable_buffer();
object.write_to(buf.deref_mut()).expect("write to memory works");

self.write_object_inner(&buf, object.kind())
Expand Down Expand Up @@ -219,7 +207,7 @@ impl crate::Repository {
&self,
mut bytes: impl std::io::Read + std::io::Seek,
) -> Result<Id<'_>, object::write::Error> {
let mut buf = self.shared_empty_buf();
let mut buf = self.empty_reusable_buffer();
std::io::copy(&mut bytes, buf.deref_mut()).expect("write to memory works");

self.write_blob_stream_inner(&buf)
Expand Down
2 changes: 1 addition & 1 deletion gix/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ pub struct Repository {
/// The path to the resolved common directory if this is a linked worktree repository or it is otherwise set.
pub(crate) common_dir: Option<PathBuf>,
/// A free-list of reusable object backing buffers
pub(crate) bufs: RefCell<Vec<Vec<u8>>>,
pub(crate) bufs: Option<RefCell<Vec<Vec<u8>>>>,
/// A pre-assembled selection of often-accessed configuration values for quick access.
pub(crate) config: crate::config::Cache,
/// the options obtained when instantiating this repository.
Expand Down
25 changes: 24 additions & 1 deletion gix/tests/repository/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ mod write_blob {
let (_tmp, repo) = empty_bare_repo()?;
let mut cursor = std::io::Cursor::new(b"hello world");
let mut seek_cursor = cursor.clone();
let mut repo = repo.without_freelist();
let oid = repo.write_blob_stream(&mut cursor)?;
assert_eq!(oid, hex_to_id("95d09f2b10159347eece71399a7e2e907ea3df4f"));

Expand All @@ -271,13 +272,15 @@ mod write_blob {
&b"world"[..],
"the seek position is taken into account, so only part of the input data is written"
);

assert!(repo.set_freelist(None).is_none(), "previous list was already dropped");
Ok(())
}
}

#[test]
fn writes_avoid_io_using_duplicate_check() -> crate::Result {
let repo = crate::named_repo("make_packed_and_loose.sh")?;
let mut repo = crate::named_repo("make_packed_and_loose.sh")?;
let store = gix::odb::loose::Store::at(repo.git_dir().join("objects"), repo.object_hash());
let loose_count = store.iter().count();
assert_eq!(loose_count, 3, "there are some loose objects");
Expand Down Expand Up @@ -326,6 +329,26 @@ fn writes_avoid_io_using_duplicate_check() -> crate::Result {
loose_count,
"no new object was written as all of them already existed"
);

{
let buf = repo.empty_reusable_buffer();
assert!(buf.is_empty(), "the freelist buffer must be clearerd");
let mut other_buf = buf.clone();
other_buf.inner = Vec::new();
}

let freelist = repo.set_freelist(None).expect("free list is present by default");
assert_eq!(
freelist.len(),
2,
"only one object was read at a time, and one is written"
);

let mut repo_clone = repo.clone();
assert!(
repo_clone.set_freelist(None).is_none(),
"new instances inherit the free-list configuration of their parent"
);
Ok(())
}

Expand Down

0 comments on commit 0cac690

Please sign in to comment.