From 748fafdffb6cb56493c316a64f3e5eb7b4b4e386 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 9 Sep 2024 11:42:28 +0200 Subject: [PATCH 1/6] add performance notes related to an arena-backed internet BString compatible type --- crate-status.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crate-status.md b/crate-status.md index b34dffa075..df1a09e9e4 100644 --- a/crate-status.md +++ b/crate-status.md @@ -196,6 +196,9 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] probe capabilities * [x] symlink creation and removal * [x] file snapshots +* [ ] **BString Interner with Arena-Backing and arbitrary value association** + - probably based on [`internment`](https://docs.rs/internment/latest/internment/struct.Arena.html#), + but needs `bumpalo` support to avoid item allocations/boxing, and avoid internal `Mutex`. (key type is pointer based). ### gix-fs * [x] probe capabilities @@ -215,6 +218,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] [name validation][tagname-validation] * [x] transform borrowed to owned objects * [x] edit trees efficiently and write changes back + - [ ] See if `gix-fs::InternedMap` improves performance. * [x] API documentation * [ ] Some examples From 1d3d25884c6b4fd2d1941d983433c8aefd779898 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 30 Aug 2024 09:49:29 +0200 Subject: [PATCH 2/6] add the `gix-merge` crate for capturing merge algorithms --- Cargo.lock | 4 ++++ Cargo.toml | 1 + README.md | 5 +++-- crate-status.md | 9 +++++++++ gix-merge/Cargo.toml | 18 ++++++++++++++++++ gix-merge/LICENSE-APACHE | 1 + gix-merge/LICENSE-MIT | 1 + gix-merge/src/lib.rs | 2 ++ 8 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 gix-merge/Cargo.toml create mode 120000 gix-merge/LICENSE-APACHE create mode 120000 gix-merge/LICENSE-MIT create mode 100644 gix-merge/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ad92273b74..fbc4bc1a3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2040,6 +2040,10 @@ dependencies = [ "thiserror", ] +[[package]] +name = "gix-merge" +version = "0.0.0" + [[package]] name = "gix-negotiate" version = "0.15.0" diff --git a/Cargo.toml b/Cargo.toml index c4fe1097bc..6e5b2dfe1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -243,6 +243,7 @@ members = [ "gix-object", "gix-glob", "gix-diff", + "gix-merge", "gix-date", "gix-traverse", "gix-dir", diff --git a/README.md b/README.md index 49bbcf1150..5d5ca7e9f8 100644 --- a/README.md +++ b/README.md @@ -130,10 +130,11 @@ is usable to some extent. * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule) * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status) * [gix-worktree-state](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-state) - * `gitoxide-core` -* **very early** _(possibly without any documentation and many rough edges)_ * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) * [gix-dir](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-dir) + * `gitoxide-core` +* **very early** _(possibly without any documentation and many rough edges)_ + * [gix-merge](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-merge) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index df1a09e9e4..ad68671457 100644 --- a/crate-status.md +++ b/crate-status.md @@ -326,6 +326,15 @@ Check out the [performance discussion][gix-diff-performance] as well. * [ ] working with hunks of data * [x] API documentation * [ ] Examples + +### gix-merge + +* [ ] three-way merge analysis of blobs with choice of how to resolve conflicts + - [ ] choose how to resolve conflicts on the data-structure + - [ ] produce a new blob based on data-structure containing possible resolutions + - [ ] `merge` style + - [ ] `diff3` style + - [ ] `zdiff` style [gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74 diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml new file mode 100644 index 0000000000..2114995cf0 --- /dev/null +++ b/gix-merge/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "gix-merge" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT OR Apache-2.0" +description = "A crate of the gitoxide project implementing merge algorithms" +authors = ["Sebastian Thiel "] +edition = "2021" +rust-version = "1.65" + +[lints] +workspace = true + +[lib] +doctest = false + +[dependencies] + diff --git a/gix-merge/LICENSE-APACHE b/gix-merge/LICENSE-APACHE new file mode 120000 index 0000000000..965b606f33 --- /dev/null +++ b/gix-merge/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-merge/LICENSE-MIT b/gix-merge/LICENSE-MIT new file mode 120000 index 0000000000..76219eb72e --- /dev/null +++ b/gix-merge/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs new file mode 100644 index 0000000000..3a6cd994a5 --- /dev/null +++ b/gix-merge/src/lib.rs @@ -0,0 +1,2 @@ +#![deny(rust_2018_idioms)] +#![forbid(unsafe_code)] From ea95284de82ad9b753c9e61da6863afa72bfb2db Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 12 Sep 2024 11:06:26 +0200 Subject: [PATCH 3/6] feat: Add `blob::pipeline::WorktreeRoots::is_unset()` That way it's easy to determine if a worktree root has any root set. --- gix-diff/src/blob/pipeline.rs | 12 +++++++++++- gix-diff/src/blob/platform.rs | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/gix-diff/src/blob/pipeline.rs b/gix-diff/src/blob/pipeline.rs index 4501821842..b9c727e4ca 100644 --- a/gix-diff/src/blob/pipeline.rs +++ b/gix-diff/src/blob/pipeline.rs @@ -22,6 +22,7 @@ pub struct WorktreeRoots { pub new_root: Option, } +/// Access impl WorktreeRoots { /// Return the root path for the given `kind` pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { @@ -30,6 +31,11 @@ impl WorktreeRoots { ResourceKind::NewOrDestination => self.new_root.as_deref(), } } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.new_root.is_none() && self.old_root.is_none() + } } /// Data as part of an [Outcome]. @@ -184,6 +190,8 @@ impl Pipeline { /// Access impl Pipeline { /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](Driver::name) to support binary searches. pub fn drivers(&self) -> &[super::Driver] { &self.drivers } @@ -445,7 +453,7 @@ impl Pipeline { } } .map_err(|err| { - convert_to_diffable::Error::CreateTempfile { + convert_to_diffable::Error::StreamCopy { source: err, rela_path: rela_path.to_owned(), } @@ -533,6 +541,8 @@ impl Driver { pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option { let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref(); let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned()) + // TODO: Add support for an actual Context, validate it *can* match Git + .with_context(Default::default()) .with_shell() .stdin(Stdio::null()) .stdout(Stdio::piped()) diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 6a550bc2dc..4c540cce85 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -184,7 +184,7 @@ pub mod prepare_diff { use crate::blob::platform::Resource; - /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation. + /// The kind of operation that should be performed based on the configuration of the resources involved in the diff. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Operation<'a> { /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments. From 865282f5f2a6e4613b2a93dd1b41af2b0b2e7757 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 12 Sep 2024 11:11:39 +0200 Subject: [PATCH 4/6] use new `WorktreeRoot` API provided by `gix-diff` --- gix/src/repository/diff.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gix/src/repository/diff.rs b/gix/src/repository/diff.rs index e2efb11ec1..4f98ebe52f 100644 --- a/gix/src/repository/diff.rs +++ b/gix/src/repository/diff.rs @@ -38,10 +38,10 @@ impl Repository { mode, self.attributes_only( &index, - if worktree_roots.new_root.is_some() || worktree_roots.old_root.is_some() { - gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping - } else { + if worktree_roots.is_unset() { gix_worktree::stack::state::attributes::Source::IdMapping + } else { + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping }, )? .inner, From b96d11fbd299fdc76ebc9a07fc75fd41721c38b3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 30 Aug 2024 09:57:16 +0200 Subject: [PATCH 5/6] Sketch the entire API surface to capture all parts of blob-merges --- Cargo.lock | 16 + gix-diff/src/blob/platform.rs | 1 + gix-merge/Cargo.toml | 26 ++ gix-merge/src/blob/builtin_driver.rs | 156 ++++++++++ gix-merge/src/blob/mod.rs | 154 +++++++++ gix-merge/src/blob/pipeline.rs | 436 ++++++++++++++++++++++++++ gix-merge/src/blob/platform.rs | 447 +++++++++++++++++++++++++++ gix-merge/src/lib.rs | 4 + 8 files changed, 1240 insertions(+) create mode 100644 gix-merge/src/blob/builtin_driver.rs create mode 100644 gix-merge/src/blob/mod.rs create mode 100644 gix-merge/src/blob/pipeline.rs create mode 100644 gix-merge/src/blob/platform.rs diff --git a/Cargo.lock b/Cargo.lock index fbc4bc1a3a..47d8945ec5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2043,6 +2043,22 @@ dependencies = [ [[package]] name = "gix-merge" version = "0.0.0" +dependencies = [ + "bstr", + "document-features", + "gix-command", + "gix-filter", + "gix-fs 0.11.3", + "gix-hash 0.14.2", + "gix-object 0.44.0", + "gix-path 0.10.11", + "gix-tempfile 14.0.2", + "gix-trace 0.1.10", + "gix-worktree 0.36.0", + "imara-diff", + "serde", + "thiserror", +] [[package]] name = "gix-negotiate" diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 4c540cce85..495d23bd43 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -383,6 +383,7 @@ impl Platform { /// /// If one of the resources is binary, the operation reports an error as such resources don't make their data available /// which is required for the external diff to run. + // TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles. pub fn prepare_diff_command( &self, diff_command: BString, diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index 2114995cf0..b75d4cb384 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -14,5 +14,31 @@ workspace = true [lib] doctest = false +[features] +default = ["blob"] +## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. +blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] + [dependencies] +gix-hash = { version = "^0.14.2", path = "../gix-hash" } +gix-object = { version = "^0.44.0", path = "../gix-object" } +gix-filter = { version = "^0.13.0", path = "../gix-filter", optional = true } +gix-worktree = { version = "^0.36.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true } +gix-command = { version = "^0.3.9", path = "../gix-command", optional = true } +gix-path = { version = "^0.10.11", path = "../gix-path", optional = true } +gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true } +gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true } +gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true } + +thiserror = "1.0.63" +imara-diff = { version = "0.1.7", optional = true } +bstr = { version = "1.5.0", default-features = false } +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } + +document-features = { version = "0.2.0", optional = true } +[package.metadata.docs.rs] +all-features = true +features = ["document-features"] diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs new file mode 100644 index 0000000000..cacef327ac --- /dev/null +++ b/gix-merge/src/blob/builtin_driver.rs @@ -0,0 +1,156 @@ +use crate::blob::BuiltinDriver; + +impl BuiltinDriver { + /// Return the name of this instance. + pub fn as_str(&self) -> &str { + match self { + BuiltinDriver::Text => "text", + BuiltinDriver::Binary => "binary", + BuiltinDriver::Union => "union", + } + } + + /// Get all available built-in drivers. + pub fn all() -> &'static [Self] { + &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union] + } + + /// Try to match one of our variants to `name`, case-sensitive, and return its instance. + pub fn by_name(name: &str) -> Option { + Self::all().iter().find(|variant| variant.as_str() == name).copied() + } +} + +/// +pub mod binary { + use crate::blob::Resolution; + + /// What to do when having to pick a side to resolve a conflict. + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ResolveWith { + /// Chose the ancestor to resolve a conflict. + Ancestor, + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, + } + + /// Tell the caller of [`merge()`] which side was picked + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, + } + + /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. + /// + /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. + pub fn merge(on_conflict: Option) -> (Pick, Resolution) { + match on_conflict { + None => (Pick::Ours, Resolution::Conflict), + Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete), + Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete), + Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete), + } + } +} + +/// +pub mod text { + use crate::blob::Resolution; + + /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express + /// merge conflicts in the resulting file. + #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ConflictStyle { + /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, + /// hiding the base version entirely. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + ///``` + #[default] + Merge, + /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes. + /// + /// This mode does not hide any information. + /// ``` + /// <<<<<<< local + /// line1-changed-by-both + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line1-changed-by-both + /// line2-changed + /// >>>>>>> incoming + ///``` + Diff3, + /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes, + /// as well as non-minimized hunks of the base. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + /// ``` + ZealousDiff3, + } + + /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub struct Options { + /// How to visualize conflicts in merged files. + pub conflict_style: ConflictStyle, + /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` + pub marker_size: usize, + /// Decide what to do to automatically resolve conflicts. + /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. + pub on_conflict: Option, + } + + impl Default for Options { + fn default() -> Self { + Options { + conflict_style: Default::default(), + marker_size: 7, + on_conflict: None, + } + } + } + + /// What to do to resolve a conflict. + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ResolveWith { + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, + /// Place our and their lines one after another, in any order + Union, + } + + /// Merge `current` and `other` with `ancestor` as base according to `opts`. + /// + /// Place the merged result in `out` and return the resolution. + pub fn merge(_out: &mut Vec, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution { + todo!("text merge"); + } +} diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs new file mode 100644 index 0000000000..f14a517d5e --- /dev/null +++ b/gix-merge/src/blob/mod.rs @@ -0,0 +1,154 @@ +// TODO: remove this - only needed while &mut Vec isn't used. +#![allow(clippy::ptr_arg)] + +use bstr::BString; +use std::path::PathBuf; + +/// +pub mod builtin_driver; +/// +pub mod pipeline; +/// +pub mod platform; + +/// Identify a merge resolution. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Resolution { + /// Everything could be resolved during the merge. + Complete, + /// A conflict is still present. + Conflict, +} + +/// A way to classify a resource suitable for merging. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum ResourceKind { + /// Our side of the state. + CurrentOrOurs, + /// Their side of the state. + OtherOrTheirs, + /// The state of the common base of both ours and theirs. + CommonAncestorOrBase, +} + +/// Define a driver program that merges +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum BuiltinDriver { + /// Perform a merge between text-sources such that conflicts are marked according to + /// `merge.conflictStyle` in the Git configuration. + /// + /// If any of the inputs, *base*, *ours* or *theirs* looks like non-text/binary, + /// the [`Binary`](Self::Binary) driver will be used instead. + /// + /// Also see [`builtin_driver::text::ConflictStyle`]. + #[default] + Text, + /// Merge 'unmergable' content by choosing *ours* or *theirs*, without performing + /// an actual merge. + /// + /// Note that if the merge operation is for virtual ancestor (a merge for merge-bases), + /// then *ours* will always be chosen. + Binary, + /// Merge text-sources and resolve conflicts by adding conflicting lines one after another, + /// in random order, without adding conflict markers either. + /// + /// This can be useful for files that change a lot, but will remain usable merely by adding + /// all changed lines. + Union, +} + +/// Define a driver program that merges +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Driver { + /// The name of the driver, as referred to by `[merge "name"]` in the git configuration. + pub name: BString, + /// The human-readable version of `name`, only to be used for displaying driver-information to the user. + pub display_name: BString, + /// The command to execute to perform the merge entirely like ` %O %A %B %L %P %S %X %Y`. + /// + /// * **%O** + /// - the common ancestor version, or *base*. + /// * **%A** + /// - the current version, or *ours*. + /// * **%B** + /// - the other version, or *theirs*. + /// * **%L** + /// - The conflict-marker size as positive number. + /// * **%P** + /// - The path in which the merged result will be stored. + /// * **%S** + /// - The conflict-label for the common ancestor or *base*. + /// * **%X** + /// - The conflict-label for the current version or *ours*. + /// * **%Y** + /// - The conflict-label for the other version or *theirs*. + /// + /// Note that conflict-labels are behind the conflict markers, to annotate them. + /// + /// A typical invocation with all arguments substituted could then look like this: + /// + /// ``` + /// .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature + /// ``` + pub command: BString, + /// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all + /// available merge-bases if there are more than one. + /// + /// This value can also be special built-in drivers named `text`, `binary` or `union`. Note that user-defined + /// drivers with the same name will be preferred over built-in ones, but only for files whose git attributes + /// specified the driver by *name*. + pub recursive: Option, +} + +/// A conversion pipeline to take an object or path from what's stored in Git to what can be merged, while +/// following the guidance of git-attributes at the respective path to learn how the merge should be performed. +/// +/// Depending on the source, different conversions are performed: +/// +/// * `worktree on disk` -> `object for storage in git` +/// * `object` -> `possibly renormalized object` +/// - Renormalization means that the `object` is converted to what would be checked out into the work-tree, +/// just to turn it back into an object. +#[derive(Clone)] +pub struct Pipeline { + /// A way to read data directly from the worktree. + pub roots: pipeline::WorktreeRoots, + /// A pipeline to convert objects from the worktree to Git, and also from Git to the worktree, and back to Git. + pub filter: gix_filter::Pipeline, + /// Options affecting the way we read files. + pub options: pipeline::Options, + /// All available merge drivers. + /// + /// They are referenced in git-attributes by name, and we hand out indices into this array. + drivers: Vec, + /// Pre-configured attributes to obtain additional merge-related information. + attrs: gix_filter::attributes::search::Outcome, + /// A buffer to produce disk-accessible paths from worktree roots. + path: PathBuf, +} + +/// A utility for gathering and processing all state necessary to perform a three-way merge. +/// +/// It can re-use buffers if all three parts of participating in the merge are +/// set repeatedly. +#[derive(Clone)] +pub struct Platform { + /// The current version (ours). + current: Option, + /// The ancestor version (base). + ancestor: Option, + /// The other version (theirs). + other: Option, + + /// A way to convert objects into a diff-able format. + pub filter: Pipeline, + /// A way to access `.gitattributes` + pub attr_stack: gix_worktree::Stack, + + /// The way we convert resources into mergeable states. + filter_mode: pipeline::Mode, +} diff --git a/gix-merge/src/blob/pipeline.rs b/gix-merge/src/blob/pipeline.rs new file mode 100644 index 0000000000..90adb61505 --- /dev/null +++ b/gix-merge/src/blob/pipeline.rs @@ -0,0 +1,436 @@ +use super::{BuiltinDriver, Pipeline, ResourceKind}; +use bstr::{BStr, ByteSlice}; +use gix_filter::attributes; +use gix_filter::driver::apply::{Delay, MaybeDelayed}; +use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome}; +use gix_object::tree::EntryKind; +use std::io::Read; +use std::path::{Path, PathBuf}; + +/// Options for use in a [`Pipeline`]. +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// The amount of bytes that an object has to reach before being treated as binary. + /// These objects will not be queried, nor will their data be processed in any way. + /// If `0`, no file is ever considered binary due to their size. + /// + /// Note that for files stored in `git`, what counts is their stored, decompressed size, + /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets + /// them. + /// However, if they are to be retrieved from the worktree, the worktree size is what matters, + /// even though that also might be a `git-lfs` file which is small in Git. + pub large_file_threshold_bytes: u64, + /// Capabilities of the file system which affect how we read worktree files. + pub fs: gix_fs::Capabilities, + /// Define which driver to use if the `merge` attribute for a resource is unspecified. + /// + /// This is the value of the `merge.default` git configuration. + pub default_driver: Option, +} + +/// The specific way to convert a resource. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Mode { + /// Prepare resources as they are stored in `git`. + /// + /// This is naturally the case when object-ids are used, but a conversion is needed + /// when data is read from a worktree. + #[default] + ToGit, + /// For sources that are object-ids, convert them to what *would* be stored in the worktree, + /// and back to what *would* be stored in Git. + /// + /// Sources that are located in a worktree are merely converted to what *would* be stored in Git. + /// + /// This is useful to prevent merge conflicts due to inconcistent whitespace. + Renormalize, +} + +/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree. +#[derive(Clone, Debug, Default)] +pub struct WorktreeRoots { + /// The worktree root where the current (or our) version of the resource is present. + pub current_root: Option, + /// The worktree root where the other (or their) version of the resource is present. + pub other_root: Option, + /// The worktree root where containing the resource of the common ancestor of our and their version. + pub common_ancestor_root: Option, +} + +impl WorktreeRoots { + /// Return the root path for the given `kind` + pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { + match kind { + ResourceKind::CurrentOrOurs => self.current_root.as_deref(), + ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(), + ResourceKind::OtherOrTheirs => self.other_root.as_deref(), + } + } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none() + } +} + +/// Lifecycle +impl Pipeline { + /// Create a new instance of a pipeline which produces blobs suitable for merging. + /// + /// `roots` allow to read worktree files directly, and `worktree_filter` is used + /// to transform object database data directly. `drivers` further configure individual paths. + /// `options` are used to further configure the way we act.. + pub fn new( + roots: WorktreeRoots, + worktree_filter: gix_filter::Pipeline, + mut drivers: Vec, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); + Pipeline { + roots, + filter: worktree_filter, + drivers, + options, + attrs: { + let mut out = gix_filter::attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("merge")); + out + }, + path: Default::default(), + } + } +} + +/// Access +impl Pipeline { + /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](super::Driver::name) to support binary searches. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + +/// Data as part of an [Outcome]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub enum Data { + /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`]. + Buffer, + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be merged. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in git, and once it's the size of file + /// in the worktree - both can differ a lot depending on filters. + size: u64, + }, +} + +/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] +pub enum DriverChoice { + /// Use the given built-in driver to perform the merge. + BuiltIn(BuiltinDriver), + /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers(). + Index(usize), +} + +impl Default for DriverChoice { + fn default() -> Self { + DriverChoice::BuiltIn(Default::default()) + } +} + +/// The outcome returned by [Pipeline::convert_to_mergeable()]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub struct Outcome { + /// If available, an index into the `drivers` field to access more diff-related information of the driver for items + /// at the given path, as previously determined by git-attributes. + /// + /// * `merge` is set + /// - Use the [`BuiltinDriver::Text`] + /// * `-merge` is unset + /// - Use the [`BuiltinDriver::Binary`] + /// * `!merge` is unspecified + /// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. + /// * `merge=name` + /// - Search for a user-configured or built-in driver called `name`. + /// - If not found, silently default to [`BuiltinDriver::Text`] + /// + /// Note that drivers are queried even if there is no object available. + pub driver: DriverChoice, + /// The data itself, suitable for diffing, and if the object or worktree item is present at all. + /// Otherwise, it's `None`. + pub data: Option, +} + +/// +pub mod convert_to_mergeable { + use std::collections::TryReserveError; + + use bstr::BString; + use gix_object::tree::EntryKind; + + /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")] + InvalidEntryKind { rela_path: BString, actual: EntryKind }, + #[error("Entry at '{rela_path}' could not be read as symbolic link")] + ReadLink { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be opened for reading or read from")] + OpenOrRead { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")] + StreamCopy { rela_path: BString, source: std::io::Error }, + #[error(transparent)] + FindObject(#[from] gix_object::find::existing_object::Error), + #[error(transparent)] + ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error), + #[error(transparent)] + ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error), + #[error("Memory allocation failed")] + OutOfMemory(#[from] TryReserveError), + } +} + +/// Conversion +impl Pipeline { + /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`. + /// The resulting merge-able data is written into `out`, if it's not too large or considered binary. + /// The returned [`Outcome`] contains information on how to use `out`, or if it's filled at all. + /// + /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is + /// a resource in the object database, i.e. if no worktree root is available. It's notable that if a worktree root + /// is present for `kind`, then a `rela_path` is used to access it on disk. + /// + /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case + /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`. + /// This is useful to simplify the calling code as empty buffers signal that nothing is there. + /// + /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode. + /// Only blobs are allowed. + /// + /// Use `convert` to control what kind of the resource will be produced. + #[allow(clippy::too_many_arguments)] + pub fn convert_to_mergeable( + &mut self, + id: &gix_hash::oid, + mode: EntryKind, + rela_path: &BStr, + kind: ResourceKind, + attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome), + objects: &dyn gix_object::FindObjectOrHeader, + convert: Mode, + out: &mut Vec, + ) -> Result { + if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) { + return Err(convert_to_mergeable::Error::InvalidEntryKind { + rela_path: rela_path.to_owned(), + actual: mode, + }); + } + + out.clear(); + attributes(rela_path, &mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let driver = match attr.assignment.state { + attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), + attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), + attributes::StateRef::Value(name) => { + let name = name.as_bstr(); + self.drivers + .binary_search_by(|d| d.name.as_bstr().cmp(name)) + .ok() + .map(DriverChoice::Index) + .or_else(|| { + name.to_str() + .ok() + .and_then(BuiltinDriver::by_name) + .map(DriverChoice::BuiltIn) + }) + .unwrap_or_default() + } + attributes::StateRef::Unspecified => self + .options + .default_driver + .map(DriverChoice::BuiltIn) + .unwrap_or_default(), + }; + match self.roots.by_kind(kind) { + Some(root) => { + self.path.clear(); + self.path.push(root); + self.path.push(gix_path::from_bstr(rela_path)); + let size_in_bytes = (self.options.large_file_threshold_bytes > 0) + .then(|| { + none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + }) + }) + .transpose()?; + let data = match size_in_bytes { + Some(None) => None, // missing as identified by the size check + Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::Binary { size }), + _ => { + let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + + if let Some(file) = file { + match convert { + Mode::ToGit | Mode::Renormalize => { + let res = self.filter.convert_to_git( + file, + gix_path::from_bstr(rela_path).as_ref(), + attributes, + &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + )?; + + match res { + ToGitOutcome::Unchanged(mut file) => { + file.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Process(mut stream) => { + stream.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + } + } + + Some(if is_binary_buf(out) { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + }) + } else { + None + } + } + }; + Ok(Outcome { driver, data }) + } + None => { + let data = if id.is_null() { + None + } else { + let header = objects + .try_header(id) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + let is_binary = self.options.large_file_threshold_bytes > 0 + && header.size > self.options.large_file_threshold_bytes; + let data = if is_binary { + Data::Binary { size: header.size } + } else { + objects + .try_find(id, out) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + + if convert == Mode::Renormalize { + let res = self + .filter + .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + + match res { + ToWorktreeOutcome::Unchanged(_) => {} + ToWorktreeOutcome::Buffer(src) => { + out.clear(); + out.try_reserve(src.len())?; + out.extend_from_slice(src); + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, out).map_err(|err| { + convert_to_mergeable::Error::StreamCopy { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + }; + } + + let res = self.filter.convert_to_git( + &**out, + &gix_path::from_bstr(rela_path), + attributes, + &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + )?; + + match res { + ToGitOutcome::Unchanged(_) => {} + ToGitOutcome::Process(mut stream) => { + stream + .read_to_end(out) + .map_err(|err| convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + + if is_binary_buf(out) { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + } + }; + Some(data) + }; + Ok(Outcome { driver, data }) + } + } + } +} + +fn none_if_missing(res: std::io::Result) -> std::io::Result> { + match res { + Ok(data) => Ok(Some(data)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} + +fn is_binary_buf(buf: &[u8]) -> bool { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) +} diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs new file mode 100644 index 0000000000..497b9bf887 --- /dev/null +++ b/gix-merge/src/blob/platform.rs @@ -0,0 +1,447 @@ +use bstr::{BStr, BString}; + +use crate::blob::pipeline::DriverChoice; +use crate::blob::{pipeline, Pipeline, Platform, ResourceKind}; + +/// A stored value representing a resource that participates in a merge. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub(super) struct Resource { + /// The `id` of the value, or `null` if it's only living in a worktree. + id: gix_hash::ObjectId, + /// The repository-relative path where the resource lives in the tree. + rela_path: BString, + /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_mergeable()]. + conversion: pipeline::Outcome, + /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. + mode: gix_object::tree::EntryKind, + /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary + /// or the resource doesn't exist. + buffer: Vec, +} + +/// A blob or executable ready to be merged in one way or another. +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct ResourceRef<'a> { + /// The data itself, suitable for merging, and if the object or worktree item is present at all. + pub data: resource::Data<'a>, + /// The location of the resource, relative to the working tree. + pub rela_path: &'a BStr, + /// Which driver to use according to the resource's configuration. + pub driver_choice: DriverChoice, + /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at + /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which + /// after its 'to-git' conversion never had its hash computed. + pub id: &'a gix_hash::oid, +} + +/// +pub mod resource { + use crate::blob::{ + pipeline, + platform::{Resource, ResourceRef}, + }; + + impl<'a> ResourceRef<'a> { + pub(super) fn new(cache: &'a Resource) -> Self { + ResourceRef { + data: cache.conversion.data.map_or(Data::Missing, |data| match data { + pipeline::Data::Buffer => Data::Buffer(&cache.buffer), + pipeline::Data::Binary { size } => Data::Binary { size }, + }), + driver_choice: cache.conversion.driver, + rela_path: cache.rela_path.as_ref(), + id: &cache.id, + } + } + } + + /// The data of a mergeable resource, as it could be determined and computed previously. + #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] + pub enum Data<'a> { + /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. + Missing, + /// The textual data as processed and ready for merging, i.e. suitable for storage in Git. + Buffer(&'a [u8]), + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be merged. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in Git, and once it's the size of file + /// in the worktree. + size: u64, + }, + } + + impl<'a> Data<'a> { + /// Return ourselves as slice of bytes if this instance stores data. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + Data::Buffer(d) => Some(d), + Data::Binary { .. } | Data::Missing => None, + } + } + } +} + +/// +pub mod set_resource { + use bstr::BString; + + use crate::blob::{pipeline, ResourceKind}; + + /// The error returned by [Platform::set_resource](super::Platform::set_resource). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Can only diff blobs, not {mode:?}")] + InvalidMode { mode: gix_object::tree::EntryKind }, + #[error("Failed to read {kind:?} worktree data from '{rela_path}'")] + Io { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error(transparent)] + ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error), + } +} + +/// +pub mod merge { + use crate::blob::pipeline::DriverChoice; + use crate::blob::platform::ResourceRef; + use crate::blob::{builtin_driver, BuiltinDriver, Driver, Resolution}; + use bstr::BString; + + /// The product of a [`prepare_merge()`](crate::blob::Platform::prepare_merge_state()) call to finally + /// perform the merge and retrieve the merge results. + #[derive(Copy, Clone)] + pub struct State<'parent> { + /// The platform that hosts the resources, used to access drivers. + pub(super) parent: &'parent super::Platform, + /// The current or our side of the merge operation. + pub current: ResourceRef<'parent>, + /// The ancestor or base of the merge operation. + pub ancestor: ResourceRef<'parent>, + /// The other or their side of the merge operation. + pub other: ResourceRef<'parent>, + } + + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub struct Options { + /// If `true`, the resources being merged are contained in a virtual ancestor, + /// which is the case when merge bases are merged into one. + pub is_virtual_ancestor: bool, + /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible and it picks a side. + pub resolve_binary_with: Option, + /// Options for the builtin [text driver](BuiltinDriver::Text). + pub text: builtin_driver::text::Options, + } + + /// + pub mod prepare_external_driver { + use std::ops::{Deref, DerefMut}; + + use crate::blob::ResourceKind; + use bstr::BString; + + /// The error returned by [State::prepare_merge_command()](super::State::prepare_external_driver()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")] + SourceOrDestinationAreBinary, + #[error( + "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created" + )] + CreateTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error( + "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command" + )] + WriteTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + } + + /// The product of a [`prepare_external_driver`](super::State::prepare_external_driver()) operation. + /// + /// This type acts like [`std::process::Command`], ready to run, with `stderr` set to *inherit*, + /// but `stdin` closed and `stdout` setup to be captured. + // TODO: remove this + #[allow(dead_code)] + pub struct Command { + /// The pre-configured command + cmd: std::process::Command, + /// A tempfile holding the *current* (ours) state of the resource. + current: gix_tempfile::Handle, + /// A tempfile holding the *ancestor* (base) state of the resource. + ancestor: gix_tempfile::Handle, + /// A tempfile holding the *other* (their) state of the resource. + other: gix_tempfile::Handle, + } + + impl Deref for Command { + type Target = std::process::Command; + + fn deref(&self) -> &Self::Target { + &self.cmd + } + } + + impl DerefMut for Command { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cmd + } + } + } + + /// + pub mod builtin_merge { + /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](super::State::builtin_merge). + pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, + /// New data was produced with the result of the merge, to be found in the buffer that was passed to + /// [builtin_merge()](super::State::builtin_merge). + Buffer, + } + } + + /// The error returned by [State::merge()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + PrepareExternalDriver(#[from] prepare_external_driver::Error), + } + + /// Plumbing + impl<'parent> State<'parent> { + /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources, + /// prepare the invocation and temporary files needed to launch it according to protocol. + /// + /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge. + /// + /// ### Deviation + /// + /// We allow passing more context than Git would by taking a whole `context`, it's up to the caller to decide how much is filled. + pub fn prepare_external_driver( + &self, + _merge_command: BString, + _context: gix_command::Context, + ) -> Result { + todo!("prepare command") + } + + /// Perform the merge according to our resources and + /// Note that if the *pick* wasn't [`Buffer`](builtin_merge::Pick::Buffer), then `out` will not have been cleared. + pub fn builtin_merge( + &self, + _out: &mut Vec, + _driver: BuiltinDriver, + _opts: Options, + ) -> (builtin_merge::Pick, Resolution) { + todo!("do full merge") + } + + /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` + /// with the built-in driver to use instead. + pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { + match self.current.driver_choice { + DriverChoice::BuiltIn(builtin) => Err(builtin), + DriverChoice::Index(idx) => self.parent.filter.drivers.get(idx).ok_or(BuiltinDriver::default()), + } + } + } + + /// Convenience + impl<'parent> State<'parent> { + /// Perform the merge, possibly invoking an external merge command, and store the result in `out`. + /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`. + pub fn merge( + &self, + _out: &mut Vec, + _opts: Options, + _context: gix_command::Context, + ) -> Result { + match self.configured_driver() { + Ok(driver) => { + let _cmd = self.prepare_external_driver(driver.command.clone(), _context)?; + todo!("invoke command and copy result") + } + Err(_builtin) => { + todo!("call builtins and copy results") + } + } + } + } +} + +/// +pub mod prepare_merge { + /// The error returned by [Platform::prepare_merge()](super::Platform::prepare_merge_state()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] + UnsetResource, + #[error("Tried to merge 'current' and 'other' where at least one of them is removed")] + CurrentOrOtherRemoved, + } +} + +/// Lifecycle +impl Platform { + /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. + /// `filter_mode` decides how to do that specifically. + /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. + pub fn new(filter: Pipeline, filter_mode: pipeline::Mode, attr_stack: gix_worktree::Stack) -> Self { + Platform { + current: None, + ancestor: None, + other: None, + filter, + filter_mode, + attr_stack, + } + } +} + +/// Preparation +impl Platform { + /// Store enough information about a resource to eventually use it in a merge, where… + /// + /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either + /// be a resource in the worktree, or it's considered a non-existing, deleted object. + /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided + /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root + /// is available for `kind`. + /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `rela_path` is the relative path as seen from the (work)tree root. + /// * `kind` identifies the side of the merge this resource will be used for. + /// * `objects` provides access to the object database in case the resource can't be read from a worktree. + pub fn set_resource( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), set_resource::Error> { + self.set_resource_inner(id, mode, rela_path, kind, objects) + } + + /// Returns the resource of the given kind if it was set. + pub fn resource(&self, kind: ResourceKind) -> Option> { + let cache = match kind { + ResourceKind::CurrentOrOurs => self.current.as_ref(), + ResourceKind::CommonAncestorOrBase => self.ancestor.as_ref(), + ResourceKind::OtherOrTheirs => self.other.as_ref(), + }?; + ResourceRef::new(cache).into() + } + + /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. + pub fn prepare_merge_state(&self) -> Result, prepare_merge::Error> { + let current = self.current.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + let ancestor = self.ancestor.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + let other = self.other.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + + let out = merge::State { + parent: self, + current: ResourceRef::new(current), + ancestor: ResourceRef::new(ancestor), + other: ResourceRef::new(other), + }; + + match (current.conversion.data, other.conversion.data) { + (None, None) => Err(prepare_merge::Error::CurrentOrOtherRemoved), + (_, _) => Ok(out), + } + } +} + +impl Platform { + fn set_resource_inner( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), set_resource::Error> { + if !matches!( + mode, + gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable + ) { + return Err(set_resource::Error::InvalidMode { mode }); + } + let entry = + self.attr_stack + .at_entry(rela_path, None, objects) + .map_err(|err| set_resource::Error::Attributes { + source: err, + kind, + rela_path: rela_path.to_owned(), + })?; + + let storage = match kind { + ResourceKind::OtherOrTheirs => &mut self.other, + ResourceKind::CommonAncestorOrBase => &mut self.ancestor, + ResourceKind::CurrentOrOurs => &mut self.current, + }; + + let mut buf_storage = Vec::new(); + let out = self.filter.convert_to_mergeable( + &id, + mode, + rela_path, + kind, + &mut |_, out| { + let _ = entry.matching_attributes(out); + }, + objects, + self.filter_mode, + storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer), + )?; + + match storage { + None => { + *storage = Some(Resource { + id, + rela_path: rela_path.to_owned(), + conversion: out, + mode, + buffer: buf_storage, + }); + } + Some(storage) => { + storage.id = id; + storage.rela_path = rela_path.to_owned(); + storage.conversion = out; + storage.mode = mode; + } + }; + Ok(()) + } +} diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs index 3a6cd994a5..8e608c53ab 100644 --- a/gix-merge/src/lib.rs +++ b/gix-merge/src/lib.rs @@ -1,2 +1,6 @@ #![deny(rust_2018_idioms)] #![forbid(unsafe_code)] + +/// +#[cfg(feature = "blob")] +pub mod blob; From b09092c545f35555d806ce69d54fda7da9b9e9b8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 15 Sep 2024 09:32:19 +0200 Subject: [PATCH 6/6] Implement `text` and `binary` merge algorithms, also with baseline tests for correctness. --- Cargo.lock | 2 + gix-merge/Cargo.toml | 4 + gix-merge/src/blob/builtin_driver.rs | 667 +++++++++++++++++- gix-merge/src/blob/platform.rs | 2 +- .../generated-archives/text-baseline.tar | Bin 0 -> 84480 bytes gix-merge/tests/fixtures/text-baseline.sh | 207 ++++++ gix-merge/tests/merge/blob/builtin_driver.rs | 145 ++++ gix-merge/tests/merge/blob/mod.rs | 1 + gix-merge/tests/merge/main.rs | 4 + 9 files changed, 1011 insertions(+), 21 deletions(-) create mode 100644 gix-merge/tests/fixtures/generated-archives/text-baseline.tar create mode 100644 gix-merge/tests/fixtures/text-baseline.sh create mode 100644 gix-merge/tests/merge/blob/builtin_driver.rs create mode 100644 gix-merge/tests/merge/blob/mod.rs create mode 100644 gix-merge/tests/merge/main.rs diff --git a/Cargo.lock b/Cargo.lock index 47d8945ec5..5912a30127 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2053,9 +2053,11 @@ dependencies = [ "gix-object 0.44.0", "gix-path 0.10.11", "gix-tempfile 14.0.2", + "gix-testtools", "gix-trace 0.1.10", "gix-worktree 0.36.0", "imara-diff", + "pretty_assertions", "serde", "thiserror", ] diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index b75d4cb384..6d8da01014 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -39,6 +39,10 @@ serde = { version = "1.0.114", optional = true, default-features = false, featur document-features = { version = "0.2.0", optional = true } +[dev-dependencies] +gix-testtools = { path = "../tests/tools" } +pretty_assertions = "1.4.0" + [package.metadata.docs.rs] all-features = true features = ["document-features"] diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs index cacef327ac..36bf78395a 100644 --- a/gix-merge/src/blob/builtin_driver.rs +++ b/gix-merge/src/blob/builtin_driver.rs @@ -23,8 +23,6 @@ impl BuiltinDriver { /// pub mod binary { - use crate::blob::Resolution; - /// What to do when having to pick a side to resolve a conflict. #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum ResolveWith { @@ -36,7 +34,7 @@ pub mod binary { Theirs, } - /// Tell the caller of [`merge()`] which side was picked + /// Tell the caller of [`merge()`](function::merge) which side was picked. #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum Pick { /// Chose the ancestor. @@ -47,23 +45,32 @@ pub mod binary { Theirs, } - /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. - /// - /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. - pub fn merge(on_conflict: Option) -> (Pick, Resolution) { - match on_conflict { - None => (Pick::Ours, Resolution::Conflict), - Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete), - Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete), - Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete), + pub(super) mod function { + use crate::blob::builtin_driver::binary::{Pick, ResolveWith}; + use crate::blob::Resolution; + + /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. + /// + /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. + pub fn merge(on_conflict: Option) -> (Pick, Resolution) { + match on_conflict { + None => (Pick::Ours, Resolution::Conflict), + Some(resolve) => ( + match resolve { + ResolveWith::Ours => Pick::Ours, + ResolveWith::Theirs => Pick::Theirs, + ResolveWith::Ancestor => Pick::Ancestor, + }, + Resolution::Complete, + ), + } } } } +pub use binary::function::merge as binary; /// pub mod text { - use crate::blob::Resolution; - /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express /// merge conflicts in the resulting file. #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] @@ -115,8 +122,11 @@ pub mod text { } /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Options { + /// Determine of the diff will be performed. + /// Defaults to [`imara_diff::Algorithm::Myers`]. + pub diff_algorithm: imara_diff::Algorithm, /// How to visualize conflicts in merged files. pub conflict_style: ConflictStyle, /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` @@ -132,10 +142,17 @@ pub mod text { conflict_style: Default::default(), marker_size: 7, on_conflict: None, + diff_algorithm: imara_diff::Algorithm::Myers, } } } + impl Options { + fn resolves_with_theirs_or_ours(&self) -> bool { + matches!(self.on_conflict, Some(ResolveWith::Ours | ResolveWith::Theirs)) + } + } + /// What to do to resolve a conflict. #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum ResolveWith { @@ -147,10 +164,620 @@ pub mod text { Union, } - /// Merge `current` and `other` with `ancestor` as base according to `opts`. - /// - /// Place the merged result in `out` and return the resolution. - pub fn merge(_out: &mut Vec, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution { - todo!("text merge"); + pub(super) mod function { + use crate::blob::builtin_driver::text::{ConflictStyle, Options, ResolveWith}; + use crate::blob::Resolution; + use bstr::{BStr, ByteSlice, ByteVec}; + use std::ops::Range; + + /// Merge `current` and `other` with `ancestor` as base according to `opts`. + /// Use `current_label`, `other_label` and `ancestor_label` to annotate conflict sections. + /// + /// `input` is for reusing memory of lists of tokens, and `other_tokens` is memory + /// for storing tokens for `other`. + /// Place the merged result in `out` (cleared before use) and return the resolution. + /// + /// # Important + /// + /// *The caller* is responsible for clearing it, otherwise tokens will accumulate. + /// This idea is to save time if the input is known to be very similar. + #[allow(clippy::too_many_arguments)] + pub fn merge<'a>( + out: &mut Vec, + input: &mut imara_diff::intern::InternedInput<&'a [u8]>, + current: &'a [u8], + current_label: Option<&BStr>, + ancestor: &'a [u8], + ancestor_label: Option<&BStr>, + other: &'a [u8], + other_label: Option<&BStr>, + opts: Options, + ) -> Resolution { + out.clear(); + input.update_before(tokens(ancestor)); + input.update_after(tokens(current)); + + let current_hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Current, + hunks: Default::default(), + }, + ); + + let current_tokens = std::mem::take(&mut input.after); + input.update_after(tokens(other)); + + let mut hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Other, + hunks: current_hunks, + }, + ); + + hunks.sort_by(|a, b| a.before.start.cmp(&b.before.start)); + let mut hunks = hunks.into_iter().filter(|h| match opts.on_conflict { + Some(ResolveWith::Ours) => h.side == Side::Current, + Some(ResolveWith::Theirs) => h.side == Side::Other, + _ => true, + }); + let mut intersecting = Vec::new(); + let mut ancestor_integrated_until = 0; + let mut resolution = Resolution::Complete; + // TODO(performance): instead of skipping hunks, let's not compute these ones at all, but only once all tests are there. + let resolves_with_theirs_or_ours = opts.resolves_with_theirs_or_ours(); + let mut filled_hunks = Vec::with_capacity(2); + while let Some(hunk) = hunks.next() { + if !resolves_with_theirs_or_ours && take_intersecting(&hunk, &mut hunks, &mut intersecting) { + fill_ancestor(&hunk.before, &mut intersecting); + + let filled_hunks_side = hunk.side; + filled_hunks.clear(); + filled_hunks.push(hunk); + fill_ancestor( + &intersecting + .first() + .zip(intersecting.last()) + .map(|(f, l)| f.before.start..l.before.end) + .expect("at least one entry"), + &mut filled_hunks, + ); + match opts.on_conflict { + None => { + let (hunks_front_and_back, num_hunks_front) = match opts.conflict_style { + ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => zealously_contract_hunks( + &mut filled_hunks, + &mut intersecting, + input, + ¤t_tokens, + ), + ConflictStyle::Diff3 => (Vec::new(), 0), + }; + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + // TODO: dedup preamble, simplify this - we know that our and their hunks aren't empty. + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + let last_hunk = back_hunks.last().or(their_hunks.last()).expect("at least one hunk"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + + write_hunks(front_hunks, input, ¤t_tokens, out); + if their_hunks.is_empty() { + // TODO: assure we run into this + write_hunks(our_hunks, input, ¤t_tokens, out); + } else if our_hunks.is_empty() { + // TODO: assure we run into this + write_hunks(their_hunks, input, ¤t_tokens, out); + } else { + resolution = Resolution::Conflict; + let our_nl = detect_line_ending(our_hunks, input, ¤t_tokens); + let their_nl = detect_line_ending(their_hunks, input, ¤t_tokens); + match opts.conflict_style { + ConflictStyle::Merge => { + write_conflict_marker(out, b'<', current_label, opts.marker_size, our_nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, opts.marker_size, their_nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, opts.marker_size, their_nl); + } + ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { + write_conflict_marker(out, b'<', current_label, opts.marker_size, our_nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + let ancestor_hunk = Hunk { + before: first_hunk.before.start..last_hunk.before.end, + after: Default::default(), + side: Side::Ancestor, + }; + let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); + let ancestor_nl = detect_line_ending(ancestor_hunk, input, ¤t_tokens); + write_conflict_marker(out, b'|', ancestor_label, opts.marker_size, ancestor_nl); + write_hunks(ancestor_hunk, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, opts.marker_size, their_nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, opts.marker_size, their_nl); + } + } + } + write_hunks(back_hunks, input, ¤t_tokens, out); + // TODO: have a sample that validates this! + ancestor_integrated_until = last_hunk.before.end; + } + Some(resolve) => { + match resolve { + ResolveWith::Ours | ResolveWith::Theirs => { + unreachable!("we should have chosen to integrate the hunks directly") + } + ResolveWith::Union => { + let (hunks_front_and_back, num_hunks_front) = zealously_contract_hunks( + &mut filled_hunks, + &mut intersecting, + input, + ¤t_tokens, + ); + + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + write_ancestor( + input, + ancestor_integrated_until, + first_hunk.before.start as usize, + out, + ); + write_hunks(front_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending(front_hunks, input, ¤t_tokens)); + write_hunks(our_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending(our_hunks, input, ¤t_tokens)); + write_hunks(their_hunks, input, ¤t_tokens, out); + if !back_hunks.is_empty() { + assure_ends_with_nl( + out, + detect_line_ending(their_hunks, input, ¤t_tokens), + ); + } + write_hunks(back_hunks, input, ¤t_tokens, out); + // TODO: have a sample that validates this! + ancestor_integrated_until = back_hunks + .last() + .or(their_hunks.last()) + .expect("at least one hunk") + .before + .end; + } + }; + } + } + } else { + write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out); + ancestor_integrated_until = hunk.before.end; + write_hunks(std::slice::from_ref(&hunk), input, ¤t_tokens, out); + } + } + write_ancestor(input, ancestor_integrated_until, input.after.len(), out); + + resolution + } + + // TODO: find actual line ending based on hunks + fn detect_line_ending( + _hunks: &[Hunk], + _input: &mut imara_diff::intern::InternedInput<&[u8]>, + _current_tokens: &[imara_diff::intern::Token], + ) -> &'static BStr { + b"\n".into() + } + + fn assure_ends_with_nl(out: &mut Vec, nl: &BStr) { + if !out.is_empty() && !out.ends_with(b"\n") { + out.push_str(nl); + } + } + + fn write_conflict_marker(out: &mut Vec, marker: u8, label: Option<&BStr>, marker_size: usize, nl: &BStr) { + assure_ends_with_nl(out, nl); + out.extend(std::iter::repeat(marker).take(marker_size)); + if let Some(label) = label { + out.push(b' '); + out.extend_from_slice(label); + } + out.push_str(nl); + } + + fn write_ancestor(input: &imara_diff::intern::InternedInput<&[u8]>, from: u32, to: usize, out: &mut Vec) { + if to < from as usize { + return; + } + if let Some(tokens) = input.before.get(from as usize..to) { + write_tokens(&input.interner, tokens, out); + } + } + + /// Look at all hunks in `in_out` and fill in the ancestor in the range of `ancestor_range`. + /// This is all based on knowing the ranges are sequences of tokens. + fn fill_ancestor(Range { start, end }: &Range, in_out: &mut Vec) { + if in_out.is_empty() { + return; + } + + fn ancestor_hunk(start: u32, num_lines: u32) -> Hunk { + let range = start..start + num_lines; + Hunk { + before: range.clone(), + after: range, + side: Side::Ancestor, + } + } + + fn is_nonzero(num: &u32) -> bool { + *num > 0 + } + + let first = &in_out[0]; + let mut first_idx = 0; + if let Some(lines_to_add) = first.before.start.checked_sub(*start).filter(is_nonzero) { + in_out.insert(0, ancestor_hunk(*start, lines_to_add)); + first_idx += 1; + } + + let mut added_hunks = false; + for (idx, next_idx) in (first_idx..in_out.len()).map(|idx| (idx, idx + 1)) { + let Some(next_hunk) = in_out.get(next_idx) else { break }; + let hunk = &in_out[idx]; + if let Some(lines_to_add) = next_hunk.after.start.checked_sub(hunk.after.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(hunk.after.end, lines_to_add)); + added_hunks = true; + } + } + let in_out_len = in_out.len(); + if added_hunks { + in_out[first_idx..in_out_len].sort_by_key(|hunk| hunk.before.start); + } + + let last = &in_out[in_out_len - 1]; + if let Some(lines_to_add) = end.checked_sub(last.before.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(last.before.end, lines_to_add)); + } + } + + /// Reduce the area of `a_hunks` and the hunks in `b_hunks` so that only those lines that are + /// actually different remain. Note that we have to compare the resolved values, not only the tokens, + /// so `current_tokens` is expected to be known to the `input` (and its `interner`). + /// Hunks from all input arrays maybe removed in the process from the front and back, in case they + /// are entirely equal to what's in `hunk`. Note also that `a_hunks` and `b_hunks` are treated to be consecutive, + /// so [`fill_ancestor()`] must have been called beforehand, and are assumed to covert the same space in the + /// ancestor buffer. + /// Use `mode` to determine how hunks may be handled. + /// + /// Return a new vector of all the hunks that were removed from front and back, with partial hunks inserted, + /// along with the amount of hunks that go front, with the remaining going towards the back. + // TODO: refactor so hunks and their associated data can go into an array for easier handling. + #[must_use] + fn zealously_contract_hunks( + a_hunks: &mut Vec, + b_hunks: &mut Vec, + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + ) -> (Vec, usize) { + let line_content = |token_idx: u32, side: Side| { + let tokens = match side { + Side::Current => current_tokens, + Side::Other => &input.after, + Side::Ancestor => &input.before, + }; + &input.interner[tokens[token_idx as usize]] + }; + fn range_by_side(hunk: &mut Hunk) -> &mut Range { + match hunk.side { + Side::Current | Side::Other => &mut hunk.after, + Side::Ancestor => &mut hunk.before, + } + } + fn truncate_hunks_from_from_front( + hunks: &mut Vec, + hunks_to_remove_until_idx: Option, + hunk_token_equal_till: Option, + mut out_hunks: Option<&mut Vec>, + ) { + let Some(hunks_to_remove_until_idx) = hunks_to_remove_until_idx else { + assert!(hunk_token_equal_till.is_none()); + return; + }; + let mut last_index_to_remove = Some(hunks_to_remove_until_idx); + let hunk = &mut hunks[hunks_to_remove_until_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_till) = hunk_token_equal_till { + let orig_start = range.start; + let new_start = hunk_token_equal_till + 1; + range.start = new_start; + if Range::::is_empty(range) { + range.start = orig_start; + } else if let Some(out) = out_hunks.as_deref_mut() { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = orig_start; + new_range.end = new_start; + + out.push(removed_hunk); + } else { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + } + } + if let Some(last_index_to_remove) = last_index_to_remove { + let mut current_idx = 0; + hunks.retain(|hunk| { + if current_idx > last_index_to_remove { + true + } else { + current_idx += 1; + if let Some(out) = out_hunks.as_deref_mut() { + out.push(hunk.clone()); + } + false + } + }); + } + } + + fn truncate_hunks_from_from_back( + hunks: &mut Vec, + remove_trailing_hunks_from_idx: Option, + hunk_token_equal_from: Option, + mut out_hunks: Option<&mut Vec>, + ) { + let Some(mut remove_trailing_hunks_from_idx) = remove_trailing_hunks_from_idx else { + assert!(hunk_token_equal_from.is_none()); + return; + }; + + let hunk = &mut hunks[remove_trailing_hunks_from_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_from) = hunk_token_equal_from { + let orig_end = range.end; + let new_end = hunk_token_equal_from; + range.end = new_end; + if Range::::is_empty(range) { + range.end = orig_end; + } else if let Some(out) = out_hunks.as_deref_mut() { + remove_trailing_hunks_from_idx += 1; + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = new_end; + new_range.end = orig_end; + + out.push(removed_hunk); + } else { + remove_trailing_hunks_from_idx += 1; + } + } + if let Some(out) = out_hunks { + out.extend_from_slice(&hunks[remove_trailing_hunks_from_idx..]); + } + hunks.truncate(remove_trailing_hunks_from_idx); + } + + let (mut last_a_hunk_idx, mut last_b_hunk_idx) = (0, 0); + let (mut out, hunks_in_front) = { + let (mut remove_leading_a_hunks_from, mut remove_leading_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_till, mut b_hunk_token_equal_till) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks(a_hunks).zip(iterate_hunks(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_till = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_till = None; + last_b_hunk_idx = b_hunk_idx; + } + if a_line == b_line { + (remove_leading_a_hunks_from, remove_leading_b_hunks_from) = + (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_till, b_hunk_token_equal_till) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + let mut out = Vec::with_capacity(remove_leading_a_hunks_from.unwrap_or_else(|| { + if a_hunk_token_equal_till.is_some() { + 1 + } else { + 0 + } + })); + truncate_hunks_from_from_front( + a_hunks, + remove_leading_a_hunks_from, + a_hunk_token_equal_till, + Some(&mut out), + ); + truncate_hunks_from_from_front(b_hunks, remove_leading_b_hunks_from, b_hunk_token_equal_till, None); + let hunks_in_front = out.len(); + (out, hunks_in_front) + }; + + (last_a_hunk_idx, last_b_hunk_idx) = (0, 0); + { + let (mut remove_trailing_a_hunks_from, mut remove_trailing_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_from, mut b_hunk_token_equal_from) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks_rev(a_hunks).zip(iterate_hunks_rev(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_from = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_from = None; + last_b_hunk_idx = b_hunk_idx; + } + + if a_line == b_line { + (remove_trailing_a_hunks_from, remove_trailing_b_hunks_from) = + (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_from, b_hunk_token_equal_from) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + truncate_hunks_from_from_back( + a_hunks, + remove_trailing_a_hunks_from, + a_hunk_token_equal_from, + Some(&mut out), + ); + truncate_hunks_from_from_back(b_hunks, remove_trailing_b_hunks_from, b_hunk_token_equal_from, None); + } + + (out, hunks_in_front) + } + + /// Return an iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. + fn iterate_hunks(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) + } + + /// Return a reverse iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. + fn iterate_hunks_rev(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().rev().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .rev() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) + } + + fn write_hunks( + hunks: &[Hunk], + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + out: &mut Vec, + ) { + for hunk in hunks { + let (tokens, range) = match hunk.side { + Side::Current => (current_tokens, &hunk.after), + Side::Other => (input.after.as_slice(), &hunk.after), + Side::Ancestor => (input.before.as_slice(), &hunk.before), + }; + write_tokens(&input.interner, &tokens[usize_range(range)], out); + } + } + + fn usize_range(range: &Range) -> Range { + range.start as usize..range.end as usize + } + + fn write_tokens( + interner: &imara_diff::intern::Interner<&[u8]>, + tokens: &[imara_diff::intern::Token], + out: &mut Vec, + ) { + for token in tokens { + out.extend_from_slice(interner[*token]); + } + } + + /// Find all hunks in `iter` which aren't from the same side as `hunk` and intersect with it. + /// Return `true` if `out` is non-empty after the operation, indicating overlapping hunks were found. + fn take_intersecting(hunk: &Hunk, iter: impl Iterator, out: &mut Vec) -> bool { + let mut iter = iter.peekable(); + out.clear(); + + while iter + .peek() + .filter(|b_hunk| { + b_hunk.side != hunk.side + && (hunk.before.contains(&b_hunk.before.start) + || (hunk.before.is_empty() && hunk.before.start == b_hunk.before.start)) + }) + .is_some() + { + out.extend(iter.next()); + } + !out.is_empty() + } + + fn tokens(input: &[u8]) -> imara_diff::sources::ByteLines<'_, true> { + imara_diff::sources::byte_lines_with_terminator(input) + } + + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + enum Side { + Current, + Other, + /// A special marker that is just used to be able to mix-in hunks that only point to the ancestor. + /// Only `before` matters then. + Ancestor, + } + + #[derive(Debug, Clone)] + struct Hunk { + before: Range, + after: Range, + side: Side, + } + + struct CollectHunks { + hunks: Vec, + side: Side, + } + + impl imara_diff::Sink for CollectHunks { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + self.hunks.push(Hunk { + before, + after, + side: self.side, + }); + } + + fn finish(self) -> Self::Out { + self.hunks + } + } } } +pub use text::function::merge as text; diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs index 497b9bf887..6b6175ee40 100644 --- a/gix-merge/src/blob/platform.rs +++ b/gix-merge/src/blob/platform.rs @@ -137,7 +137,7 @@ pub mod merge { pub other: ResourceRef<'parent>, } - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Options { /// If `true`, the resources being merged are contained in a virtual ancestor, /// which is the case when merge bases are merged into one. diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar new file mode 100644 index 0000000000000000000000000000000000000000..d32d4b29b6fb3d9987dc406156c3c0ac6f96ce78 GIT binary patch literal 84480 zcmeHQ-H+q85%+8KR|v?%B}Kg}>N81u1%fth9}4v43S1fl2dyo8)!Ne5mUDMS(Z9W< zK1q=h^+Bq(cf1d4Q{)UaGe2=O9Fh-@EA1Z(e*({2k^lJ9{-2j2%k`0ETeg#54;tmR0o;9%~q8X=ugnTCe{cF+(O_tIB z<2K&yoQGp{+DBWu*1YdT|Gw+CabNwmY~-z2{3F*2aQ%5~hqZp?__vJT79W$fg!FPc zKRg}MC*wUb-rL4|$9V4=|0zDLx=-ObD(WJ`HXOq>-WDIywrcvj;?ruHJx6iH$TNm% z3~DKq0IT&J`Rhv(r-{O+fDHeZZMX67Ik*h)uTaq)q^uI(6-)ty9fG>kViy1PQ(x)b z%JFZt@$Yz!5BOKA1u?M-|Hb$Y=od@reoI;W*Pfix8Mb_hJ5T3ERu@FP#FkN6-9$pRk1X=JR0j zU%da9UVyR)fDHeF@!xY{{(ru52T*|Ve}ItgxXQym&UD`%!T-4YFDnSJ{}1_px;3*} z5&b`&qTN{|tMU3rR)7%bU!zVc(zX5{vv9A;2qx$s`7Y#tRyxx8ztR756mInxz!d%C z0RZr?Q}HG3T>snXyg9`$8BY7t!IJ2|FaHy;zMM{ZANE||EkkL7o{&v`~qkm&TI0h zKbz}yiXZGJ@};zg_3o4&-*RSRd};nAPH>CtIm}G5hAuNd#@Vy^=F2x`c#6zTdOSu+ zc7DJ$|CpZ4(0n{!UJhIHDZ;h)X}UGf zQD&w)Orh)yUOkd!(J48bUtN-t10iuVaxnlqP!}ot^m8<+I!@q^sdbaRyxiJ5$QU#r~aOmed?8oiSkNNxG<1vgA z07<8~?(Ra4|MZDGnDxB;V~I`h|L_7(2j2e|ur^x%kDA^CCg|UG@cJK!|LZpZM*lD2 z=ARl^PSU>(@qZ1f;7I2B-=xVd-gDPf70CQQn~(n?8^QV?Rr2qn^V23hML!!~H1WS( zdcvE5{DNNsaX8p~Yg)z^W{T zhw)#V(ka5U{xxm?lKcOC^FOxhwBP@oz=r%!N=@3nR;izi9n{+Z7-V_qd;I4bp7A68 z`^NZtoeIh{saFo?O^RJ$Ji|8v+~nJO^~gJe|Et!MFyzV<#C!u9ZTAj@S1U&9Mf#`e z?O&w$w>&QX?;(i)En=P`fb$rA2O+^lw6Q!B2LJVQUu{2R?|&8^>s#@EJpG4Y{!gtM zgw#a%7gmJAyOaQf|JsRDd#jW15Ah$hO6Z_Q@BdPH>m600_zoq&;J@(yRd4?y_y76v z-?IS!i&z_s|BHzEYMJo(SHAs=6#rzer+BA`o}gh4TB80E2%$+rLQhFPQ&x@d+Rh|69P^CXD~p zArOq|)S)Ao|64$7>R2#J)U*AI6#s(pAD{mK@qcw%?xqCef46L2!xi$}KH!C14sjzWMFfNv^%7=?GTx854MDIF? zS03g+XAxu^Sxm9`J&Pn48z*Ze@gZ3Xh?b(wIczc6;zpQ`~5u zDW+|JxxiQgb43vYp@ryR)L}U~San*IK4u-3rjMb}0yl>!Jmuh_o;y@wV|`R1ZF^KP zV|i40V|P^H7{Wq}qiD@$&|zsRS#(;MdL|u~rJhlbMHPMEY^p47E59r*J5Dl9&7!yE z1%YLiSK6vCi^nqPu`soadM!;YD~ZMFU{=s9-H>MVWOS)WW5wMGEs$_n+iT)VpUVM! zjq;zAJ@!;;jHjnkd8@gt_Ecjmx2GPtl%Lya_Eb{YF`>ox@Ls%w9Z0#RE_Nd9`a0Qx z% zXkEyG7~q=hyt9szbA%6bPZMgykydT&S6Y=eX=!D~%B8g%!I!+DoXK4*oC0KY#v@jSql;{T~Jkct*tr{{fzG{4|3SG`&R$ zNbz4k?WfmjRd6luN}APw=BoM-Om3C@Btv;|4$eB z;Fb;k2RDI=cO(HR{<#?K_?wsGzdiojR^Y(-KjS4mzD0xoV0>d}aq|)whW~kM!e#ik zEpGjH5V(Nlih&%r!@L~PuRB?}L*We$b zf1?C6x@IQ2lxJmU;kkKr&DPPFOdB|E6=aT7V0d3{4|it>+5MyPa7KK zERZTp^dtUWZ4ubhn2I^%Cyyw+Um(RwphpV|GQSF5GW=UM_xulB1o2;1Q+j??`~MPT zl0$y~-;a@@ge@Vgt<)`|6%@b3DqpZqX+*AGQWo5-{Rx{fd}|s zME@}UFCylvWl|j5ptpNu{2!VBW#I!rA^%IST5{^;u^sg8m&&=y{67cBdRzXV?YlOt z|5vUFo?sRGX&>KWke$rYdh;A6`)Ip<`n-Nhv**0jUT5k0DXOo<3Ep~klOE$_U+)K2 zlB=f>OLA<(p+u{2Q(u?Lj@H>_We2Dj2Bfy|+Fz|oIVsEa67i7XA7j6bf7kM1{~wVo zRhRW)ibX38bJEJL9smF=)k;~AUi-PPgS8C*1nceb-$9W7p+gsKn$z&#rUJijRRV2C zO~-vm@sIcVY~$Z{VE<1Yx@glZ@XwC)L;dnLE%<$V5@7s)5)4+6@X->36#xAA?*tC) z|J@R!mY*>GYZ2?s5T)_20s6TdWcU~C|Ao`P0RKxEkr@BIQNh0WA04V|0tD5P&(hc+ zD7S=Whwgnda2ybvtsC_)~9gP+z$2FJkiVNrr#H`0v_~|1A=w@-i|0Vf_>%Kd*@|LS)9q6NIlUy1tsKNRr(AH;uH8S45~d3uZ;_zax3X)jk4M5UBFdm?jYX(WS~sbQ#@f$^w)6e~3H($wOqn$VqoJve7Gl{x<+0 zDL1Ho7ZPCbUqAQN-r5BGJ9q*Z@UK=4LTZ42p;%r`A%p+gol|@Ja{P1iKaPW7|6jFQ z5L2W5c5w|Ty=Mt9_%Hl_fBlvR4>`5$#czamNu{xxL*$?z|D{(BBS69(|F-%twulUIIyyomnC=6@jH zgZvMDDybbhK9Zm569m|LP|5WQiM32&cJ+zDJ%_)=WpWpx2u@LP4JxAt4+tOa#&^A!-ek4HQ zU(@XW0RNAV|GOUE6B6P-`t7W(fAzD!a{VL3?f>U^Hq8I%*ZykN`~DBF^-HMW|H^~H zKNX)*+aAH82<(Fe;?Xe!Wwc0g`hUcxkYsW8xm9t-Px<>Fm;d29KIDJQ5c%My$i|sA zf&C5oW_?NGPlMY*#ru!|kAFNKtbd74=QvFi-J%r#E_eQ$8{jh`;QddL0(!_`ZJVXG pP5Z(5_Q5m1`u^qR5Sf3%c7QzhYe)>~5g%Lv34jDZ0yisx{{c4k(ux28 literal 0 HcmV?d00001 diff --git a/gix-merge/tests/fixtures/text-baseline.sh b/gix-merge/tests/fixtures/text-baseline.sh new file mode 100644 index 0000000000..63f108f79d --- /dev/null +++ b/gix-merge/tests/fixtures/text-baseline.sh @@ -0,0 +1,207 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init +rm -Rf .git/hooks + +function baseline() { + local ours=$DIR/${1:?1: our file}.blob; + local base=$DIR/${2:?2: base file}.blob; + local theirs=$DIR/${3:?3: their file}.blob; + local output=$DIR/${4:?4: the name of the output file}.merged; + + shift 4 + git merge-file --stdout "$@" "$ours" "$base" "$theirs" > "$output" || true + + echo "$ours" "$base" "$theirs" "$output" "$@" >> baseline.cases +} + +mkdir simple +(cd simple + echo -e "line1-changed-by-both\nline2-to-be-changed-in-incoming" > ours.blob + echo -e "line1-to-be-changed-by-both\nline2-to-be-changed-in-incoming" > base.blob + echo -e "line1-changed-by-both\nline2-changed" > theirs.blob +) + +# one big change includes multiple smaller ones +mkdir multi-change +(cd multi-change + cat < base.blob +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +Y +Y +8 +Z +EOF + + cat < theirs.blob +T +T +T +T +T +T +T +T +T +T +EOF +) + +# a change with deletion/clearing our file +mkdir clear-ours +(cd clear-ours + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + touch ours.blob + + cat < theirs.blob +T +T +T +T +T +EOF +) + +# a change with deletion/clearing their file +mkdir clear-theirs +(cd clear-theirs + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +O +O +O +O +O +EOF + + touch theirs.blob +) + +# differently sized changes +mkdir ours-2-lines-theirs-1-line +(cd ours-2-lines-theirs-1-line + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +EOF + + cat < theirs.blob +0 +1 +Y +3 +4 +5 +EOF +) + +# partial match +mkdir partial-match +(cd partial-match + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +X1 +X2 +X3 +X4 +5 +EOF + + cat < theirs.blob +0 +X1 +2 +X3 +X4 +5 +EOF +) + +# based on 'unique merge base' from 'diff3-conflict-markers' +mkdir unique-merge-base-with-insertion +(cd unique-merge-base-with-insertion + cat < base.blob +1 +2 +3 +4 +5 +EOF + + # no trailing newline + echo -n $'1\n2\n3\n4\n5\n7' > ours.blob + echo -n $'1\n2\n3\n4\n5\nsix' > theirs.blob +) + +for dir in simple \ + multi-change \ + clear-ours \ + clear-theirs \ + ours-2-lines-theirs-1-line \ + partial-match \ + unique-merge-base-with-insertion; do + DIR=$dir + baseline ours base theirs merge + baseline ours base theirs diff3 --diff3 + baseline ours base theirs zdiff3 --zdiff3 + baseline ours base theirs merge-ours --ours + baseline ours base theirs merge-theirs --theirs + baseline ours base theirs merge-union --union +done \ No newline at end of file diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs new file mode 100644 index 0000000000..42d31832cb --- /dev/null +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -0,0 +1,145 @@ +use gix_merge::blob::builtin_driver::binary::{Pick, ResolveWith}; +use gix_merge::blob::{builtin_driver, Resolution}; + +#[test] +fn binary() { + assert_eq!( + builtin_driver::binary(None), + (Pick::Ours, Resolution::Conflict), + "by default it picks ours and marks it as conflict" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ancestor)), + (Pick::Ancestor, Resolution::Complete), + "Otherwise we can pick anything and it will mark it as complete" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ours)), + (Pick::Ours, Resolution::Complete) + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Theirs)), + (Pick::Theirs, Resolution::Complete) + ); +} + +mod text { + use bstr::ByteSlice; + use gix_merge::blob::Resolution; + use pretty_assertions::assert_str_eq; + + #[test] + fn run_baseline() -> crate::Result { + let root = gix_testtools::scripted_fixture_read_only("text-baseline.sh")?; + let cases = std::fs::read_to_string(root.join("baseline.cases"))?; + let mut out = Vec::new(); + for case in baseline::Expectations::new(&root, &cases) { + let mut input = imara_diff::intern::InternedInput::default(); + dbg!(&case.name, case.options); + let actual = gix_merge::blob::builtin_driver::text( + &mut out, + &mut input, + &case.ours, + Some(case.ours_marker.as_str().as_ref()), + &case.base, + Some(case.base_marker.as_str().as_ref()), + &case.theirs, + Some(case.theirs_marker.as_str().as_ref()), + case.options, + ); + let expected_resolution = if case.expected.contains_str("<<<<<<<") { + Resolution::Conflict + } else { + Resolution::Complete + }; + assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,); + assert_str_eq!( + out.as_bstr().to_str_lossy(), + case.expected.to_str_lossy(), + "{}: output mismatch\n{}", + case.name, + out.as_bstr() + ); + } + Ok(()) + } + + mod baseline { + use bstr::BString; + use gix_merge::blob::builtin_driver::text::{ConflictStyle, ResolveWith}; + use std::path::Path; + + #[derive(Debug)] + pub struct Expectation { + pub ours: BString, + pub ours_marker: String, + pub theirs: BString, + pub theirs_marker: String, + pub base: BString, + pub base_marker: String, + pub name: BString, + pub expected: BString, + pub options: gix_merge::blob::builtin_driver::text::Options, + } + + pub struct Expectations<'a> { + root: &'a Path, + lines: std::str::Lines<'a>, + } + + impl<'a> Expectations<'a> { + pub fn new(root: &'a Path, cases: &'a str) -> Self { + Expectations { + root, + lines: cases.lines(), + } + } + } + + impl Iterator for Expectations<'_> { + type Item = Expectation; + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let mut words = line.split(' '); + let (Some(ours), Some(base), Some(theirs), Some(output)) = + (words.next(), words.next(), words.next(), words.next()) + else { + panic!("need at least the input and output") + }; + + let read = |rela_path: &str| read_blob(self.root, rela_path); + + let mut options = gix_merge::blob::builtin_driver::text::Options::default(); + for arg in words { + match arg { + "--diff3" => options.conflict_style = ConflictStyle::Diff3, + "--zdiff3" => options.conflict_style = ConflictStyle::ZealousDiff3, + "--ours" => options.on_conflict = Some(ResolveWith::Ours), + "--theirs" => options.on_conflict = Some(ResolveWith::Theirs), + "--union" => options.on_conflict = Some(ResolveWith::Union), + _ => panic!("Unknown argument to parse into options: '{arg}'"), + } + } + + Some(Expectation { + ours: read(ours), + ours_marker: ours.into(), + theirs: read(theirs), + theirs_marker: theirs.into(), + base: read(base), + base_marker: base.into(), + expected: read(output), + name: output.into(), + options, + }) + } + } + + fn read_blob(root: &Path, rela_path: &str) -> BString { + std::fs::read(root.join(rela_path)) + .unwrap_or_else(|_| panic!("Failed to read '{rela_path}' in '{}'", root.display())) + .into() + } + } +} diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs new file mode 100644 index 0000000000..f781f63e48 --- /dev/null +++ b/gix-merge/tests/merge/blob/mod.rs @@ -0,0 +1 @@ +mod builtin_driver; diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs new file mode 100644 index 0000000000..05375cb227 --- /dev/null +++ b/gix-merge/tests/merge/main.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "blob")] +mod blob; + +pub use gix_testtools::Result;