Skip to content

Commit

Permalink
feat: add InMemoryPassThrough implementation.
Browse files Browse the repository at this point in the history
An implementation of `Header`, `Write` and `Find`, that can optionally
write everything to an in-memory store, and if enabled, also read
objects back from there.

That way it can present a consistent view to objects from two locations.
  • Loading branch information
Byron committed Sep 5, 2024
1 parent b279957 commit e738acc
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gix-odb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"]

[dependencies]
gix-features = { version = "^0.38.2", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] }
gix-hashtable = { version = "^0.5.2", path = "../gix-hashtable" }
gix-hash = { version = "^0.14.2", path = "../gix-hash" }
gix-date = { version = "^0.9.0", path = "../gix-date" }
gix-path = { version = "^0.10.10", path = "../gix-path" }
Expand Down
3 changes: 3 additions & 0 deletions gix-odb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ pub fn sink(object_hash: gix_hash::Kind) -> Sink {
}
}

///
pub mod memory;

mod sink;

///
Expand Down
166 changes: 166 additions & 0 deletions gix-odb/src/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use crate::find::Header;
use gix_object::Data;
use std::cell::RefCell;
use std::ops::{Deref, DerefMut};

/// An object database to read from any implementation but write to memory.
/// Previously written objects can be returned from memory upon query which
/// makes the view of objects consistent, but it's impact temporary unless
/// [`memory objects`](Proxy::memory) are persisted in a separate step.
///
/// It's possible to turn off the memory by removing it from the instance.
pub struct Proxy<T> {
/// The actual odb implementation
inner: T,
/// The kind of hash to produce when writing new objects.
object_hash: gix_hash::Kind,
/// The storage for in-memory objects.
/// If `None`, the proxy will always read from and write-through to `inner`.
memory: Option<RefCell<Storage>>,
}

/// Lifecycle
impl<T> Proxy<T> {
/// Create a new instance using `odb` as actual object provider, with an empty in-memory store for
/// objects that are to be written.
/// Use `object_hash` to determine the kind of hash to produce when writing new objects.
pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy<T> {
Proxy {
inner: odb,
object_hash,
memory: Some(Default::default()),
}
}
}

/// Lifecycle
impl<T> Proxy<T> {
/// Take all the objects in memory so far, with the memory storage itself and return it.
///
/// The instance will remain in a state where it won't be able to store objects in memory at all,
/// they will now be stored in the underlying object database.
///
/// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage
/// using [`set_object_memory()`](Self::set_object_memory()).
pub fn take_object_memory(&mut self) -> Option<Storage> {
self.memory.take().map(|mem| mem.into_inner())
}

/// Set the object storage to contain only `new` objects, and return whichever objects were there previously.
pub fn set_object_memory(&mut self, new: Storage) -> Option<Storage> {
let previous = self.take_object_memory();
self.memory = Some(RefCell::new(new));
previous
}

/// Reset the internal storage to be empty, and return the previous storage, with all objects
/// it contained.
///
/// Note that this does nothing if this instance didn't contain object memory in the first place.
/// In that case, set it explicitly.
pub fn reset_object_memory(&self) -> Option<Storage> {
self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut()))
}

/// Return the amount of objects currently stored in memory.
pub fn num_objects_in_memory(&self) -> usize {
self.memory.as_ref().map_or(0, |m| m.borrow().len())
}
}

impl<T> gix_object::Find for Proxy<T>
where
T: gix_object::Find,
{
fn try_find<'a>(
&self,
id: &gix_hash::oid,
buffer: &'a mut Vec<u8>,
) -> Result<Option<Data<'a>>, gix_object::find::Error> {
if let Some(map) = self.memory.as_ref() {
let map = map.borrow();
if let Some((kind, data)) = map.get(id) {
buffer.clear();
buffer.extend_from_slice(data);
return Ok(Some(Data {
kind: *kind,
data: &*buffer,
}));
}
}
self.inner.try_find(id, buffer)
}
}

impl<T> crate::Header for Proxy<T>
where
T: crate::Header,
{
fn try_header(&self, id: &gix_hash::oid) -> Result<Option<Header>, gix_object::find::Error> {
if let Some(map) = self.memory.as_ref() {
let map = map.borrow();
if let Some((kind, data)) = map.get(id) {
return Ok(Some(Header::Loose {
kind: *kind,
size: data.len() as u64,
}));
}
}
self.inner.try_header(id)
}
}

impl<T> crate::Write for Proxy<T>
where
T: crate::Write,
{
fn write_stream(
&self,
kind: gix_object::Kind,
size: u64,
from: &mut dyn std::io::Read,
) -> Result<gix_hash::ObjectId, crate::write::Error> {
let Some(map) = self.memory.as_ref() else {
return self.inner.write_stream(kind, size, from);
};

let mut buf = Vec::new();
from.read_to_end(&mut buf)?;

let id = gix_object::compute_hash(self.object_hash, kind, &buf);
map.borrow_mut().insert(id, (kind, buf));
Ok(id)
}
}

impl<T> Deref for Proxy<T> {
type Target = T;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl<T> DerefMut for Proxy<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}

/// A mapping between an object id and all data corresponding to an object, acting like a `HashMap<ObjectID, (Kind, Data)>`.
#[derive(Default, Debug, Clone, Eq, PartialEq)]
pub struct Storage(gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>);

impl Deref for Storage {
type Target = gix_hashtable::HashMap<gix_hash::ObjectId, (gix_object::Kind, Vec<u8>)>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl DerefMut for Storage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
103 changes: 103 additions & 0 deletions gix-odb/tests/odb/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
use crate::odb::hex_to_id;
use gix_object::{tree, FindExt};
use gix_odb::{Header, HeaderExt, Write};
use gix_testtools::tempfile::TempDir;

#[test]
fn without_memory() -> crate::Result {
let (mut odb, _tmp) = db_rw()?;
let mut buf = Vec::new();
let mem = odb.take_object_memory().expect("it starts out with memory set");
assert_eq!(mem.len(), 0, "no object is stored initially");
let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c");
let tree = odb.find_tree(&existing, &mut buf).expect("present and valid");
assert_eq!(tree.entries.len(), 1);
odb.header(existing).expect("header can be found just the same");

let mut tree = tree.to_owned();
tree.entries.push(tree::Entry {
mode: tree::EntryKind::Blob.into(),
filename: "z-for-sorting_another-file-with-same-content".into(),
oid: existing,
});
let new_tree_id = odb.write(&tree)?;
assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c"));
let actual = odb.header(new_tree_id).expect("header of new objects can be found");
assert_eq!(actual.kind(), gix_object::Kind::Tree);
assert_eq!(actual.size(), 104);

let new_tree = odb
.find_tree(&new_tree_id, &mut buf)
.expect("new tree is also available as object")
.to_owned();
assert_eq!(new_tree, tree);

Ok(())
}

#[test]
fn with_memory() -> crate::Result {
let mut odb = db()?;
assert_eq!(
(*odb).iter()?.count(),
6,
"let's be sure we didn't accidentally write anything"
);
let mut buf = Vec::new();
let existing = hex_to_id("21d3ba9a26b790a4858d67754ae05d04dfce4d0c");
let tree = odb.find_tree(&existing, &mut buf).expect("present and valid");
assert_eq!(tree.entries.len(), 1);
odb.header(existing).expect("header can be found just the same");
assert_eq!(
odb.num_objects_in_memory(),
0,
"nothing is stored when fetching objects - it's not an object cache"
);

let mut tree = tree.to_owned();
tree.entries.push(tree::Entry {
mode: tree::EntryKind::Blob.into(),
filename: "z-for-sorting_another-file-with-same-content".into(),
oid: existing,
});
let new_tree_id = odb.write(&tree)?;
assert_eq!(new_tree_id, hex_to_id("249b0b4106a5e9e7875e446a26468e22ec47a05c"));
let actual = odb.header(new_tree_id).expect("header of new objects can be found");
assert_eq!(actual.kind(), gix_object::Kind::Tree);
assert_eq!(actual.size(), 104);

let new_tree = odb
.find_tree(&new_tree_id, &mut buf)
.expect("new tree is also available as object")
.to_owned();
assert_eq!(new_tree, tree);

let mem = odb.reset_object_memory().expect("memory is still available");
assert_eq!(mem.len(), 1, "one new object was just written");

assert_eq!(
odb.try_header(&new_tree_id)?,
None,
"without memory, the object can't be found anymore"
);

let prev_mem = odb.set_object_memory(mem).expect("reset means it's just cleared");
assert_eq!(prev_mem.len(), 0, "nothing was stored after the reset");

assert_eq!(odb.num_objects_in_memory(), 1, "we put all previous objects back");

Ok(())
}

fn db() -> crate::Result<gix_odb::memory::Proxy<gix_odb::Handle>> {
let odb = gix_odb::at(
gix_testtools::scripted_fixture_read_only_standalone("repo_with_loose_objects.sh")?.join(".git/objects"),
)?;
Ok(gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1))
}

fn db_rw() -> crate::Result<(gix_odb::memory::Proxy<gix_odb::Handle>, TempDir)> {
let tmp = gix_testtools::scripted_fixture_writable_standalone("repo_with_loose_objects.sh")?;
let odb = gix_odb::at(tmp.path().join(".git/objects"))?;
Ok((gix_odb::memory::Proxy::new(odb, gix_hash::Kind::Sha1), tmp))
}
1 change: 1 addition & 0 deletions gix-odb/tests/odb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fn db_small_packs() -> gix_odb::Handle {
pub mod alternate;
pub mod find;
pub mod header;
pub mod memory;
pub mod regression;
pub mod sink;
pub mod store;

0 comments on commit e738acc

Please sign in to comment.