From 748fafdffb6cb56493c316a64f3e5eb7b4b4e386 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Mon, 9 Sep 2024 11:42:28 +0200
Subject: [PATCH 1/6] add performance notes related to an arena-backed internet
 BString compatible type

---
 crate-status.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/crate-status.md b/crate-status.md
index b34dffa075..df1a09e9e4 100644
--- a/crate-status.md
+++ b/crate-status.md
@@ -196,6 +196,9 @@ The top-level crate that acts as hub to all functionality provided by the `gix-*
    * [x] probe capabilities
    * [x] symlink creation and removal
    * [x] file snapshots
+* [ ] **BString Interner with Arena-Backing and arbitrary value association**
+    - probably based on [`internment`](https://docs.rs/internment/latest/internment/struct.Arena.html#),
+      but needs `bumpalo` support to avoid item allocations/boxing, and avoid internal `Mutex`. (key type is pointer based).
     
 ### gix-fs
 * [x] probe capabilities
@@ -215,6 +218,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-*
       * [x] [name validation][tagname-validation]
 * [x] transform borrowed to owned objects
 * [x] edit trees efficiently and write changes back
+    - [ ] See if `gix-fs::InternedMap` improves performance.
 * [x] API documentation
     * [ ] Some examples
 

From 1d3d25884c6b4fd2d1941d983433c8aefd779898 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Fri, 30 Aug 2024 09:49:29 +0200
Subject: [PATCH 2/6] add the `gix-merge` crate for capturing merge algorithms

---
 Cargo.lock               |  4 ++++
 Cargo.toml               |  1 +
 README.md                |  5 +++--
 crate-status.md          |  9 +++++++++
 gix-merge/Cargo.toml     | 18 ++++++++++++++++++
 gix-merge/LICENSE-APACHE |  1 +
 gix-merge/LICENSE-MIT    |  1 +
 gix-merge/src/lib.rs     |  2 ++
 8 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 gix-merge/Cargo.toml
 create mode 120000 gix-merge/LICENSE-APACHE
 create mode 120000 gix-merge/LICENSE-MIT
 create mode 100644 gix-merge/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index ad92273b74..fbc4bc1a3a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2040,6 +2040,10 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "gix-merge"
+version = "0.0.0"
+
 [[package]]
 name = "gix-negotiate"
 version = "0.15.0"
diff --git a/Cargo.toml b/Cargo.toml
index c4fe1097bc..6e5b2dfe1b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -243,6 +243,7 @@ members = [
     "gix-object",
     "gix-glob",
     "gix-diff",
+    "gix-merge",
     "gix-date",
     "gix-traverse",
     "gix-dir",
diff --git a/README.md b/README.md
index 49bbcf1150..5d5ca7e9f8 100644
--- a/README.md
+++ b/README.md
@@ -130,10 +130,11 @@ is usable to some extent.
   * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule)
   * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status)
   * [gix-worktree-state](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-state)
-  * `gitoxide-core`
-* **very early**  _(possibly without any documentation and many rough edges)_
   * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date)
   * [gix-dir](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-dir)
+  * `gitoxide-core`
+* **very early**  _(possibly without any documentation and many rough edges)_
+  * [gix-merge](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-merge)
 * **idea** _(just a name placeholder)_
   * [gix-note](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-note)
   * [gix-fetchhead](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-fetchhead)
diff --git a/crate-status.md b/crate-status.md
index df1a09e9e4..ad68671457 100644
--- a/crate-status.md
+++ b/crate-status.md
@@ -326,6 +326,15 @@ Check out the [performance discussion][gix-diff-performance] as well.
 * [ ] working with hunks of data
 * [x] API documentation
     * [ ] Examples
+
+### gix-merge
+
+* [ ] three-way merge analysis of blobs with choice of how to resolve conflicts
+    - [ ] choose how to resolve conflicts on the data-structure
+    - [ ] produce a new blob based on data-structure containing possible resolutions
+        - [ ] `merge` style
+        - [ ] `diff3` style
+        - [ ] `zdiff` style
     
 [gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74
 
diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml
new file mode 100644
index 0000000000..2114995cf0
--- /dev/null
+++ b/gix-merge/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "gix-merge"
+version = "0.0.0"
+repository = "https://github.com/Byron/gitoxide"
+license = "MIT OR Apache-2.0"
+description = "A crate of the gitoxide project implementing merge algorithms"
+authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"]
+edition = "2021"
+rust-version = "1.65"
+
+[lints]
+workspace = true
+
+[lib]
+doctest = false
+
+[dependencies]
+
diff --git a/gix-merge/LICENSE-APACHE b/gix-merge/LICENSE-APACHE
new file mode 120000
index 0000000000..965b606f33
--- /dev/null
+++ b/gix-merge/LICENSE-APACHE
@@ -0,0 +1 @@
+../LICENSE-APACHE
\ No newline at end of file
diff --git a/gix-merge/LICENSE-MIT b/gix-merge/LICENSE-MIT
new file mode 120000
index 0000000000..76219eb72e
--- /dev/null
+++ b/gix-merge/LICENSE-MIT
@@ -0,0 +1 @@
+../LICENSE-MIT
\ No newline at end of file
diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs
new file mode 100644
index 0000000000..3a6cd994a5
--- /dev/null
+++ b/gix-merge/src/lib.rs
@@ -0,0 +1,2 @@
+#![deny(rust_2018_idioms)]
+#![forbid(unsafe_code)]

From ea95284de82ad9b753c9e61da6863afa72bfb2db Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Thu, 12 Sep 2024 11:06:26 +0200
Subject: [PATCH 3/6] feat: Add `blob::pipeline::WorktreeRoots::is_unset()`

That way it's easy to determine if a worktree root has any root set.
---
 gix-diff/src/blob/pipeline.rs | 12 +++++++++++-
 gix-diff/src/blob/platform.rs |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gix-diff/src/blob/pipeline.rs b/gix-diff/src/blob/pipeline.rs
index 4501821842..b9c727e4ca 100644
--- a/gix-diff/src/blob/pipeline.rs
+++ b/gix-diff/src/blob/pipeline.rs
@@ -22,6 +22,7 @@ pub struct WorktreeRoots {
     pub new_root: Option<PathBuf>,
 }
 
+/// Access
 impl WorktreeRoots {
     /// Return the root path for the given `kind`
     pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
@@ -30,6 +31,11 @@ impl WorktreeRoots {
             ResourceKind::NewOrDestination => self.new_root.as_deref(),
         }
     }
+
+    /// Return `true` if all worktree roots are unset.
+    pub fn is_unset(&self) -> bool {
+        self.new_root.is_none() && self.old_root.is_none()
+    }
 }
 
 /// Data as part of an [Outcome].
@@ -184,6 +190,8 @@ impl Pipeline {
 /// Access
 impl Pipeline {
     /// Return all drivers that this instance was initialized with.
+    ///
+    /// They are sorted by [`name`](Driver::name) to support binary searches.
     pub fn drivers(&self) -> &[super::Driver] {
         &self.drivers
     }
@@ -445,7 +453,7 @@ impl Pipeline {
                                         }
                                     }
                                     .map_err(|err| {
-                                        convert_to_diffable::Error::CreateTempfile {
+                                        convert_to_diffable::Error::StreamCopy {
                                             source: err,
                                             rela_path: rela_path.to_owned(),
                                         }
@@ -533,6 +541,8 @@ impl Driver {
     pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option<std::process::Command> {
         let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref();
         let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned())
+            // TODO: Add support for an actual Context, validate it *can* match Git
+            .with_context(Default::default())
             .with_shell()
             .stdin(Stdio::null())
             .stdout(Stdio::piped())
diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs
index 6a550bc2dc..4c540cce85 100644
--- a/gix-diff/src/blob/platform.rs
+++ b/gix-diff/src/blob/platform.rs
@@ -184,7 +184,7 @@ pub mod prepare_diff {
 
     use crate::blob::platform::Resource;
 
-    /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation.
+    /// The kind of operation that should be performed based on the configuration of the resources involved in the diff.
     #[derive(Debug, Copy, Clone, Eq, PartialEq)]
     pub enum Operation<'a> {
         /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments.

From 865282f5f2a6e4613b2a93dd1b41af2b0b2e7757 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Thu, 12 Sep 2024 11:11:39 +0200
Subject: [PATCH 4/6] use new `WorktreeRoot` API provided by `gix-diff`

---
 gix/src/repository/diff.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gix/src/repository/diff.rs b/gix/src/repository/diff.rs
index e2efb11ec1..4f98ebe52f 100644
--- a/gix/src/repository/diff.rs
+++ b/gix/src/repository/diff.rs
@@ -38,10 +38,10 @@ impl Repository {
             mode,
             self.attributes_only(
                 &index,
-                if worktree_roots.new_root.is_some() || worktree_roots.old_root.is_some() {
-                    gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping
-                } else {
+                if worktree_roots.is_unset() {
                     gix_worktree::stack::state::attributes::Source::IdMapping
+                } else {
+                    gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping
                 },
             )?
             .inner,

From b96d11fbd299fdc76ebc9a07fc75fd41721c38b3 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Fri, 30 Aug 2024 09:57:16 +0200
Subject: [PATCH 5/6] Sketch the entire API surface to capture all parts of
 blob-merges

---
 Cargo.lock                           |  16 +
 gix-diff/src/blob/platform.rs        |   1 +
 gix-merge/Cargo.toml                 |  26 ++
 gix-merge/src/blob/builtin_driver.rs | 156 ++++++++++
 gix-merge/src/blob/mod.rs            | 154 +++++++++
 gix-merge/src/blob/pipeline.rs       | 436 ++++++++++++++++++++++++++
 gix-merge/src/blob/platform.rs       | 447 +++++++++++++++++++++++++++
 gix-merge/src/lib.rs                 |   4 +
 8 files changed, 1240 insertions(+)
 create mode 100644 gix-merge/src/blob/builtin_driver.rs
 create mode 100644 gix-merge/src/blob/mod.rs
 create mode 100644 gix-merge/src/blob/pipeline.rs
 create mode 100644 gix-merge/src/blob/platform.rs

diff --git a/Cargo.lock b/Cargo.lock
index fbc4bc1a3a..47d8945ec5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2043,6 +2043,22 @@ dependencies = [
 [[package]]
 name = "gix-merge"
 version = "0.0.0"
+dependencies = [
+ "bstr",
+ "document-features",
+ "gix-command",
+ "gix-filter",
+ "gix-fs 0.11.3",
+ "gix-hash 0.14.2",
+ "gix-object 0.44.0",
+ "gix-path 0.10.11",
+ "gix-tempfile 14.0.2",
+ "gix-trace 0.1.10",
+ "gix-worktree 0.36.0",
+ "imara-diff",
+ "serde",
+ "thiserror",
+]
 
 [[package]]
 name = "gix-negotiate"
diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs
index 4c540cce85..495d23bd43 100644
--- a/gix-diff/src/blob/platform.rs
+++ b/gix-diff/src/blob/platform.rs
@@ -383,6 +383,7 @@ impl Platform {
     ///
     /// If one of the resources is binary, the operation reports an error as such resources don't make their data available
     /// which is required for the external diff to run.
+    // TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles.
     pub fn prepare_diff_command(
         &self,
         diff_command: BString,
diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml
index 2114995cf0..b75d4cb384 100644
--- a/gix-merge/Cargo.toml
+++ b/gix-merge/Cargo.toml
@@ -14,5 +14,31 @@ workspace = true
 [lib]
 doctest = false
 
+[features]
+default = ["blob"]
+## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
+blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"]
+## Data structures implement `serde::Serialize` and `serde::Deserialize`.
+serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
+
 [dependencies]
+gix-hash = { version = "^0.14.2", path = "../gix-hash" }
+gix-object = { version = "^0.44.0", path = "../gix-object" }
+gix-filter = { version = "^0.13.0", path = "../gix-filter", optional = true }
+gix-worktree = { version = "^0.36.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
+gix-command = { version = "^0.3.9", path = "../gix-command", optional = true }
+gix-path = { version = "^0.10.11", path = "../gix-path", optional = true }
+gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true }
+gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true }
+gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true }
+
+thiserror = "1.0.63"
+imara-diff = { version = "0.1.7", optional = true }
+bstr = { version = "1.5.0", default-features = false }
+serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
+
+document-features = { version = "0.2.0", optional = true }
 
+[package.metadata.docs.rs]
+all-features = true
+features = ["document-features"]
diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs
new file mode 100644
index 0000000000..cacef327ac
--- /dev/null
+++ b/gix-merge/src/blob/builtin_driver.rs
@@ -0,0 +1,156 @@
+use crate::blob::BuiltinDriver;
+
+impl BuiltinDriver {
+    /// Return the name of this instance.
+    pub fn as_str(&self) -> &str {
+        match self {
+            BuiltinDriver::Text => "text",
+            BuiltinDriver::Binary => "binary",
+            BuiltinDriver::Union => "union",
+        }
+    }
+
+    /// Get all available built-in drivers.
+    pub fn all() -> &'static [Self] {
+        &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union]
+    }
+
+    /// Try to match one of our variants to `name`, case-sensitive, and return its instance.
+    pub fn by_name(name: &str) -> Option<Self> {
+        Self::all().iter().find(|variant| variant.as_str() == name).copied()
+    }
+}
+
+///
+pub mod binary {
+    use crate::blob::Resolution;
+
+    /// What to do when having to pick a side to resolve a conflict.
+    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub enum ResolveWith {
+        /// Chose the ancestor to resolve a conflict.
+        Ancestor,
+        /// Chose our side to resolve a conflict.
+        Ours,
+        /// Chose their side to resolve a conflict.
+        Theirs,
+    }
+
+    /// Tell the caller of [`merge()`] which side was picked
+    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub enum Pick {
+        /// Chose the ancestor.
+        Ancestor,
+        /// Chose our side.
+        Ours,
+        /// Chose their side.
+        Theirs,
+    }
+
+    /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic.
+    ///
+    /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`.
+    pub fn merge(on_conflict: Option<ResolveWith>) -> (Pick, Resolution) {
+        match on_conflict {
+            None => (Pick::Ours, Resolution::Conflict),
+            Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete),
+            Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete),
+            Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete),
+        }
+    }
+}
+
+///
+pub mod text {
+    use crate::blob::Resolution;
+
+    /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express
+    /// merge conflicts in the resulting file.
+    #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub enum ConflictStyle {
+        /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes,
+        /// hiding the base version entirely.
+        ///
+        /// ```
+        /// line1-changed-by-both
+        /// <<<<<<< local
+        /// line2-to-be-changed-in-incoming
+        /// =======
+        /// line2-changed
+        /// >>>>>>> incoming
+        ///```
+        #[default]
+        Merge,
+        /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes.
+        ///
+        /// This mode does not hide any information.
+        /// ```
+        /// <<<<<<< local
+        /// line1-changed-by-both
+        /// line2-to-be-changed-in-incoming
+        /// ||||||| 9a8d80c
+        /// line1-to-be-changed-by-both
+        /// line2-to-be-changed-in-incoming
+        /// =======
+        /// line1-changed-by-both
+        /// line2-changed
+        /// >>>>>>> incoming
+        ///```
+        Diff3,
+        /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes,
+        /// as well as non-minimized hunks of the base.
+        ///
+        /// ```
+        /// line1-changed-by-both
+        /// <<<<<<< local
+        /// line2-to-be-changed-in-incoming
+        /// ||||||| 9a8d80c
+        /// line1-to-be-changed-by-both
+        /// line2-to-be-changed-in-incoming
+        /// =======
+        /// line2-changed
+        /// >>>>>>> incoming
+        /// ```
+        ZealousDiff3,
+    }
+
+    /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
+    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub struct Options {
+        /// How to visualize conflicts in merged files.
+        pub conflict_style: ConflictStyle,
+        /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<`
+        pub marker_size: usize,
+        /// Decide what to do to automatically resolve conflicts.
+        /// If `None`, add conflict markers according to `conflict_style` and `marker_size`.
+        pub on_conflict: Option<ResolveWith>,
+    }
+
+    impl Default for Options {
+        fn default() -> Self {
+            Options {
+                conflict_style: Default::default(),
+                marker_size: 7,
+                on_conflict: None,
+            }
+        }
+    }
+
+    /// What to do to resolve a conflict.
+    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub enum ResolveWith {
+        /// Chose our side to resolve a conflict.
+        Ours,
+        /// Chose their side to resolve a conflict.
+        Theirs,
+        /// Place our and their lines one after another, in any order
+        Union,
+    }
+
+    /// Merge `current` and `other` with `ancestor` as base according to `opts`.
+    ///
+    /// Place the merged result in `out` and return the resolution.
+    pub fn merge(_out: &mut Vec<u8>, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution {
+        todo!("text merge");
+    }
+}
diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs
new file mode 100644
index 0000000000..f14a517d5e
--- /dev/null
+++ b/gix-merge/src/blob/mod.rs
@@ -0,0 +1,154 @@
+// TODO: remove this - only needed while &mut Vec<u8> isn't used.
+#![allow(clippy::ptr_arg)]
+
+use bstr::BString;
+use std::path::PathBuf;
+
+///
+pub mod builtin_driver;
+///
+pub mod pipeline;
+///
+pub mod platform;
+
+/// Identify a merge resolution.
+#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Resolution {
+    /// Everything could be resolved during the merge.
+    Complete,
+    /// A conflict is still present.
+    Conflict,
+}
+
+/// A way to classify a resource suitable for merging.
+#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub enum ResourceKind {
+    /// Our side of the state.
+    CurrentOrOurs,
+    /// Their side of the state.
+    OtherOrTheirs,
+    /// The state of the common base of both ours and theirs.
+    CommonAncestorOrBase,
+}
+
+/// Define a driver program that merges
+///
+/// Some values are related to diffing, some are related to conversions.
+#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum BuiltinDriver {
+    /// Perform a merge between text-sources such that conflicts are marked according to
+    /// `merge.conflictStyle` in the Git configuration.
+    ///
+    /// If any of the inputs, *base*, *ours* or *theirs* looks like non-text/binary,
+    /// the [`Binary`](Self::Binary) driver will be used instead.
+    ///
+    /// Also see [`builtin_driver::text::ConflictStyle`].
+    #[default]
+    Text,
+    /// Merge 'unmergable' content by choosing *ours* or *theirs*, without performing
+    /// an actual merge.
+    ///
+    /// Note that if the merge operation is for virtual ancestor (a merge for merge-bases),
+    /// then *ours* will always be chosen.
+    Binary,
+    /// Merge text-sources and resolve conflicts by adding conflicting lines one after another,
+    /// in random order, without adding conflict markers either.
+    ///
+    /// This can be useful for files that change a lot, but will remain usable merely by adding
+    /// all changed lines.
+    Union,
+}
+
+/// Define a driver program that merges
+///
+/// Some values are related to diffing, some are related to conversions.
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
+pub struct Driver {
+    /// The name of the driver, as referred to by `[merge "name"]` in the git configuration.
+    pub name: BString,
+    /// The human-readable version of `name`, only to be used for displaying driver-information to the user.
+    pub display_name: BString,
+    /// The command to execute to perform the merge entirely like `<command> %O %A %B %L %P %S %X %Y`.
+    ///
+    /// * **%O**
+    ///     - the common ancestor version, or *base*.
+    /// * **%A**
+    ///     - the current version, or *ours*.
+    /// * **%B**
+    ///     - the other version, or *theirs*.
+    /// * **%L**
+    ///     - The conflict-marker size as positive number.
+    /// * **%P**
+    ///     - The path in which the merged result will be stored.
+    /// * **%S**
+    ///     - The conflict-label for the common ancestor or *base*.
+    /// * **%X**
+    ///     - The conflict-label for the current version or *ours*.
+    /// * **%Y**
+    ///     - The conflict-label for the other version or *theirs*.
+    ///
+    /// Note that conflict-labels are behind the conflict markers, to annotate them.
+    ///
+    /// A typical invocation with all arguments substituted could then look like this:
+    ///
+    /// ```
+    /// <driver-program> .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature
+    /// ```
+    pub command: BString,
+    /// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all
+    /// available merge-bases if there are more than one.
+    ///
+    /// This value can also be special built-in drivers named `text`, `binary` or `union`. Note that user-defined
+    /// drivers with the same name will be preferred over built-in ones, but only for files whose git attributes
+    /// specified the driver by *name*.
+    pub recursive: Option<BString>,
+}
+
+/// A conversion pipeline to take an object or path from what's stored in Git to what can be merged, while
+/// following the guidance of git-attributes at the respective path to learn how the merge should be performed.
+///
+/// Depending on the source, different conversions are performed:
+///
+/// * `worktree on disk` -> `object for storage in git`
+/// * `object` -> `possibly renormalized object`
+///     - Renormalization means that the `object` is converted to what would be checked out into the work-tree,
+///       just to turn it back into an object.
+#[derive(Clone)]
+pub struct Pipeline {
+    /// A way to read data directly from the worktree.
+    pub roots: pipeline::WorktreeRoots,
+    /// A pipeline to convert objects from the worktree to Git, and also from Git to the worktree, and back to Git.
+    pub filter: gix_filter::Pipeline,
+    /// Options affecting the way we read files.
+    pub options: pipeline::Options,
+    /// All available merge drivers.
+    ///
+    /// They are referenced in git-attributes by name, and we hand out indices into this array.
+    drivers: Vec<Driver>,
+    /// Pre-configured attributes to obtain additional merge-related information.
+    attrs: gix_filter::attributes::search::Outcome,
+    /// A buffer to produce disk-accessible paths from worktree roots.
+    path: PathBuf,
+}
+
+/// A utility for gathering and processing all state necessary to perform a three-way merge.
+///
+/// It can re-use buffers if all three parts of participating in the merge are
+/// set repeatedly.
+#[derive(Clone)]
+pub struct Platform {
+    /// The current version (ours).
+    current: Option<platform::Resource>,
+    /// The ancestor version (base).
+    ancestor: Option<platform::Resource>,
+    /// The other version (theirs).
+    other: Option<platform::Resource>,
+
+    /// A way to convert objects into a diff-able format.
+    pub filter: Pipeline,
+    /// A way to access `.gitattributes`
+    pub attr_stack: gix_worktree::Stack,
+
+    /// The way we convert resources into mergeable states.
+    filter_mode: pipeline::Mode,
+}
diff --git a/gix-merge/src/blob/pipeline.rs b/gix-merge/src/blob/pipeline.rs
new file mode 100644
index 0000000000..90adb61505
--- /dev/null
+++ b/gix-merge/src/blob/pipeline.rs
@@ -0,0 +1,436 @@
+use super::{BuiltinDriver, Pipeline, ResourceKind};
+use bstr::{BStr, ByteSlice};
+use gix_filter::attributes;
+use gix_filter::driver::apply::{Delay, MaybeDelayed};
+use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome};
+use gix_object::tree::EntryKind;
+use std::io::Read;
+use std::path::{Path, PathBuf};
+
+/// Options for use in a [`Pipeline`].
+#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
+pub struct Options {
+    /// The amount of bytes that an object has to reach before being treated as binary.
+    /// These objects will not be queried, nor will their data be processed in any way.
+    /// If `0`, no file is ever considered binary due to their size.
+    ///
+    /// Note that for files stored in `git`, what counts is their stored, decompressed size,
+    /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
+    /// them.
+    /// However, if they are to be retrieved from the worktree, the worktree size is what matters,
+    /// even though that also might be a `git-lfs` file which is small in Git.
+    pub large_file_threshold_bytes: u64,
+    /// Capabilities of the file system which affect how we read worktree files.
+    pub fs: gix_fs::Capabilities,
+    /// Define which driver to use if the `merge` attribute for a resource is unspecified.
+    ///
+    /// This is the value of the `merge.default` git configuration.
+    pub default_driver: Option<BuiltinDriver>,
+}
+
+/// The specific way to convert a resource.
+#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum Mode {
+    /// Prepare resources as they are stored in `git`.
+    ///
+    /// This is naturally the case when object-ids are used, but a conversion is needed
+    /// when data is read from a worktree.
+    #[default]
+    ToGit,
+    /// For sources that are object-ids, convert them to what *would* be stored in the worktree,
+    /// and back to what *would* be stored in Git.
+    ///
+    /// Sources that are located in a worktree are merely converted to what *would* be stored in Git.
+    ///
+    /// This is useful to prevent merge conflicts due to inconcistent whitespace.
+    Renormalize,
+}
+
+/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
+#[derive(Clone, Debug, Default)]
+pub struct WorktreeRoots {
+    /// The worktree root where the current (or our) version of the resource is present.
+    pub current_root: Option<PathBuf>,
+    /// The worktree root where the other (or their) version of the resource is present.
+    pub other_root: Option<PathBuf>,
+    /// The worktree root where containing the resource of the common ancestor of our and their version.
+    pub common_ancestor_root: Option<PathBuf>,
+}
+
+impl WorktreeRoots {
+    /// Return the root path for the given `kind`
+    pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
+        match kind {
+            ResourceKind::CurrentOrOurs => self.current_root.as_deref(),
+            ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(),
+            ResourceKind::OtherOrTheirs => self.other_root.as_deref(),
+        }
+    }
+
+    /// Return `true` if all worktree roots are unset.
+    pub fn is_unset(&self) -> bool {
+        self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none()
+    }
+}
+
+/// Lifecycle
+impl Pipeline {
+    /// Create a new instance of a pipeline which produces blobs suitable for merging.
+    ///
+    /// `roots` allow to read worktree files directly, and `worktree_filter` is used
+    /// to transform object database data directly. `drivers` further configure individual paths.
+    /// `options` are used to further configure the way we act..
+    pub fn new(
+        roots: WorktreeRoots,
+        worktree_filter: gix_filter::Pipeline,
+        mut drivers: Vec<super::Driver>,
+        options: Options,
+    ) -> Self {
+        drivers.sort_by(|a, b| a.name.cmp(&b.name));
+        Pipeline {
+            roots,
+            filter: worktree_filter,
+            drivers,
+            options,
+            attrs: {
+                let mut out = gix_filter::attributes::search::Outcome::default();
+                out.initialize_with_selection(&Default::default(), Some("merge"));
+                out
+            },
+            path: Default::default(),
+        }
+    }
+}
+
+/// Access
+impl Pipeline {
+    /// Return all drivers that this instance was initialized with.
+    ///
+    /// They are sorted by [`name`](super::Driver::name) to support binary searches.
+    pub fn drivers(&self) -> &[super::Driver] {
+        &self.drivers
+    }
+}
+
+/// Data as part of an [Outcome].
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub enum Data {
+    /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
+    Buffer,
+    /// The size that the binary blob had at the given revision, without having applied filters, as it's either
+    /// considered binary or above the big-file threshold.
+    ///
+    /// In this state, the binary file cannot be merged.
+    Binary {
+        /// The size of the object prior to performing any filtering or as it was found on disk.
+        ///
+        /// Note that technically, the size isn't always representative of the same 'state' of the
+        /// content, as once it can be the size of the blob in git, and once it's the size of file
+        /// in the worktree - both can differ a lot depending on filters.
+        size: u64,
+    },
+}
+
+/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`].
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)]
+pub enum DriverChoice {
+    /// Use the given built-in driver to perform the merge.
+    BuiltIn(BuiltinDriver),
+    /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers().
+    Index(usize),
+}
+
+impl Default for DriverChoice {
+    fn default() -> Self {
+        DriverChoice::BuiltIn(Default::default())
+    }
+}
+
+/// The outcome returned by [Pipeline::convert_to_mergeable()].
+#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub struct Outcome {
+    /// If available, an index into the `drivers` field to access more diff-related information of the driver for items
+    /// at the given path, as previously determined by git-attributes.
+    ///
+    /// * `merge` is set
+    ///     - Use the [`BuiltinDriver::Text`]
+    /// * `-merge` is unset
+    ///     - Use the [`BuiltinDriver::Binary`]
+    /// * `!merge` is unspecified
+    ///     - Use [`Options::default_driver`] or [`BuiltinDriver::Text`].
+    /// * `merge=name`
+    ///     - Search for a user-configured or built-in driver called `name`.
+    ///     - If not found, silently default to [`BuiltinDriver::Text`]
+    ///
+    /// Note that drivers are queried even if there is no object available.
+    pub driver: DriverChoice,
+    /// The data itself, suitable for diffing, and if the object or worktree item is present at all.
+    /// Otherwise, it's `None`.
+    pub data: Option<Data>,
+}
+
+///
+pub mod convert_to_mergeable {
+    use std::collections::TryReserveError;
+
+    use bstr::BString;
+    use gix_object::tree::EntryKind;
+
+    /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()).
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
+        InvalidEntryKind { rela_path: BString, actual: EntryKind },
+        #[error("Entry at '{rela_path}' could not be read as symbolic link")]
+        ReadLink { rela_path: BString, source: std::io::Error },
+        #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
+        OpenOrRead { rela_path: BString, source: std::io::Error },
+        #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
+        StreamCopy { rela_path: BString, source: std::io::Error },
+        #[error(transparent)]
+        FindObject(#[from] gix_object::find::existing_object::Error),
+        #[error(transparent)]
+        ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
+        #[error(transparent)]
+        ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
+        #[error("Memory allocation failed")]
+        OutOfMemory(#[from] TryReserveError),
+    }
+}
+
+/// Conversion
+impl Pipeline {
+    /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
+    /// The resulting merge-able data is written into `out`, if it's not too large or considered binary.
+    /// The returned [`Outcome`] contains information on how to use `out`, or if it's filled at all.
+    ///
+    /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is
+    /// a resource in the object database, i.e. if no worktree root is available. It's notable that if a worktree root
+    /// is present for `kind`, then a `rela_path` is used to access it on disk.
+    ///
+    /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
+    /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`.
+    /// This is useful to simplify the calling code as empty buffers signal that nothing is there.
+    ///
+    /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
+    /// Only blobs are allowed.
+    ///
+    /// Use `convert` to control what kind of the resource will be produced.
+    #[allow(clippy::too_many_arguments)]
+    pub fn convert_to_mergeable(
+        &mut self,
+        id: &gix_hash::oid,
+        mode: EntryKind,
+        rela_path: &BStr,
+        kind: ResourceKind,
+        attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
+        objects: &dyn gix_object::FindObjectOrHeader,
+        convert: Mode,
+        out: &mut Vec<u8>,
+    ) -> Result<Outcome, convert_to_mergeable::Error> {
+        if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) {
+            return Err(convert_to_mergeable::Error::InvalidEntryKind {
+                rela_path: rela_path.to_owned(),
+                actual: mode,
+            });
+        }
+
+        out.clear();
+        attributes(rela_path, &mut self.attrs);
+        let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'");
+        let driver = match attr.assignment.state {
+            attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text),
+            attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary),
+            attributes::StateRef::Value(name) => {
+                let name = name.as_bstr();
+                self.drivers
+                    .binary_search_by(|d| d.name.as_bstr().cmp(name))
+                    .ok()
+                    .map(DriverChoice::Index)
+                    .or_else(|| {
+                        name.to_str()
+                            .ok()
+                            .and_then(BuiltinDriver::by_name)
+                            .map(DriverChoice::BuiltIn)
+                    })
+                    .unwrap_or_default()
+            }
+            attributes::StateRef::Unspecified => self
+                .options
+                .default_driver
+                .map(DriverChoice::BuiltIn)
+                .unwrap_or_default(),
+        };
+        match self.roots.by_kind(kind) {
+            Some(root) => {
+                self.path.clear();
+                self.path.push(root);
+                self.path.push(gix_path::from_bstr(rela_path));
+                let size_in_bytes = (self.options.large_file_threshold_bytes > 0)
+                    .then(|| {
+                        none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
+                            convert_to_mergeable::Error::OpenOrRead {
+                                rela_path: rela_path.to_owned(),
+                                source: err,
+                            }
+                        })
+                    })
+                    .transpose()?;
+                let data = match size_in_bytes {
+                    Some(None) => None, // missing as identified by the size check
+                    Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::Binary { size }),
+                    _ => {
+                        let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
+                            convert_to_mergeable::Error::OpenOrRead {
+                                rela_path: rela_path.to_owned(),
+                                source: err,
+                            }
+                        })?;
+
+                        if let Some(file) = file {
+                            match convert {
+                                Mode::ToGit | Mode::Renormalize => {
+                                    let res = self.filter.convert_to_git(
+                                        file,
+                                        gix_path::from_bstr(rela_path).as_ref(),
+                                        attributes,
+                                        &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())),
+                                    )?;
+
+                                    match res {
+                                        ToGitOutcome::Unchanged(mut file) => {
+                                            file.read_to_end(out).map_err(|err| {
+                                                convert_to_mergeable::Error::OpenOrRead {
+                                                    rela_path: rela_path.to_owned(),
+                                                    source: err,
+                                                }
+                                            })?;
+                                        }
+                                        ToGitOutcome::Process(mut stream) => {
+                                            stream.read_to_end(out).map_err(|err| {
+                                                convert_to_mergeable::Error::OpenOrRead {
+                                                    rela_path: rela_path.to_owned(),
+                                                    source: err,
+                                                }
+                                            })?;
+                                        }
+                                        ToGitOutcome::Buffer(buf) => {
+                                            out.clear();
+                                            out.try_reserve(buf.len())?;
+                                            out.extend_from_slice(buf);
+                                        }
+                                    }
+                                }
+                            }
+
+                            Some(if is_binary_buf(out) {
+                                let size = out.len() as u64;
+                                out.clear();
+                                Data::Binary { size }
+                            } else {
+                                Data::Buffer
+                            })
+                        } else {
+                            None
+                        }
+                    }
+                };
+                Ok(Outcome { driver, data })
+            }
+            None => {
+                let data = if id.is_null() {
+                    None
+                } else {
+                    let header = objects
+                        .try_header(id)
+                        .map_err(gix_object::find::existing_object::Error::Find)?
+                        .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
+                    let is_binary = self.options.large_file_threshold_bytes > 0
+                        && header.size > self.options.large_file_threshold_bytes;
+                    let data = if is_binary {
+                        Data::Binary { size: header.size }
+                    } else {
+                        objects
+                            .try_find(id, out)
+                            .map_err(gix_object::find::existing_object::Error::Find)?
+                            .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
+
+                        if convert == Mode::Renormalize {
+                            let res = self
+                                .filter
+                                .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
+
+                            match res {
+                                ToWorktreeOutcome::Unchanged(_) => {}
+                                ToWorktreeOutcome::Buffer(src) => {
+                                    out.clear();
+                                    out.try_reserve(src.len())?;
+                                    out.extend_from_slice(src);
+                                }
+                                ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
+                                    std::io::copy(&mut stream, out).map_err(|err| {
+                                        convert_to_mergeable::Error::StreamCopy {
+                                            rela_path: rela_path.to_owned(),
+                                            source: err,
+                                        }
+                                    })?;
+                                }
+                                ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
+                                    unreachable!("we prohibit this")
+                                }
+                            };
+                        }
+
+                        let res = self.filter.convert_to_git(
+                            &**out,
+                            &gix_path::from_bstr(rela_path),
+                            attributes,
+                            &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())),
+                        )?;
+
+                        match res {
+                            ToGitOutcome::Unchanged(_) => {}
+                            ToGitOutcome::Process(mut stream) => {
+                                stream
+                                    .read_to_end(out)
+                                    .map_err(|err| convert_to_mergeable::Error::OpenOrRead {
+                                        rela_path: rela_path.to_owned(),
+                                        source: err,
+                                    })?;
+                            }
+                            ToGitOutcome::Buffer(buf) => {
+                                out.clear();
+                                out.try_reserve(buf.len())?;
+                                out.extend_from_slice(buf);
+                            }
+                        }
+
+                        if is_binary_buf(out) {
+                            let size = out.len() as u64;
+                            out.clear();
+                            Data::Binary { size }
+                        } else {
+                            Data::Buffer
+                        }
+                    };
+                    Some(data)
+                };
+                Ok(Outcome { driver, data })
+            }
+        }
+    }
+}
+
+fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
+    match res {
+        Ok(data) => Ok(Some(data)),
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
+        Err(err) => Err(err),
+    }
+}
+
+fn is_binary_buf(buf: &[u8]) -> bool {
+    let buf = &buf[..buf.len().min(8000)];
+    buf.contains(&0)
+}
diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs
new file mode 100644
index 0000000000..497b9bf887
--- /dev/null
+++ b/gix-merge/src/blob/platform.rs
@@ -0,0 +1,447 @@
+use bstr::{BStr, BString};
+
+use crate::blob::pipeline::DriverChoice;
+use crate::blob::{pipeline, Pipeline, Platform, ResourceKind};
+
+/// A stored value representing a resource that participates in a merge.
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
+pub(super) struct Resource {
+    /// The `id` of the value, or `null` if it's only living in a worktree.
+    id: gix_hash::ObjectId,
+    /// The repository-relative path where the resource lives in the tree.
+    rela_path: BString,
+    /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_mergeable()].
+    conversion: pipeline::Outcome,
+    /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`.
+    mode: gix_object::tree::EntryKind,
+    /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary
+    /// or the resource doesn't exist.
+    buffer: Vec<u8>,
+}
+
+/// A blob or executable ready to be merged in one way or another.
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct ResourceRef<'a> {
+    /// The data itself, suitable for merging, and if the object or worktree item is present at all.
+    pub data: resource::Data<'a>,
+    /// The location of the resource, relative to the working tree.
+    pub rela_path: &'a BStr,
+    /// Which driver to use according to the resource's configuration.
+    pub driver_choice: DriverChoice,
+    /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at
+    /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which
+    /// after its 'to-git' conversion never had its hash computed.
+    pub id: &'a gix_hash::oid,
+}
+
+///
+pub mod resource {
+    use crate::blob::{
+        pipeline,
+        platform::{Resource, ResourceRef},
+    };
+
+    impl<'a> ResourceRef<'a> {
+        pub(super) fn new(cache: &'a Resource) -> Self {
+            ResourceRef {
+                data: cache.conversion.data.map_or(Data::Missing, |data| match data {
+                    pipeline::Data::Buffer => Data::Buffer(&cache.buffer),
+                    pipeline::Data::Binary { size } => Data::Binary { size },
+                }),
+                driver_choice: cache.conversion.driver,
+                rela_path: cache.rela_path.as_ref(),
+                id: &cache.id,
+            }
+        }
+    }
+
+    /// The data of a mergeable resource, as it could be determined and computed previously.
+    #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+    pub enum Data<'a> {
+        /// The object is missing, either because it didn't exist in the working tree or because its `id` was null.
+        Missing,
+        /// The textual data as processed and ready for merging, i.e. suitable for storage in Git.
+        Buffer(&'a [u8]),
+        /// The size that the binary blob had at the given revision, without having applied filters, as it's either
+        /// considered binary or above the big-file threshold.
+        ///
+        /// In this state, the binary file cannot be merged.
+        Binary {
+            /// The size of the object prior to performing any filtering or as it was found on disk.
+            ///
+            /// Note that technically, the size isn't always representative of the same 'state' of the
+            /// content, as once it can be the size of the blob in Git, and once it's the size of file
+            /// in the worktree.
+            size: u64,
+        },
+    }
+
+    impl<'a> Data<'a> {
+        /// Return ourselves as slice of bytes if this instance stores data.
+        pub fn as_slice(&self) -> Option<&'a [u8]> {
+            match self {
+                Data::Buffer(d) => Some(d),
+                Data::Binary { .. } | Data::Missing => None,
+            }
+        }
+    }
+}
+
+///
+pub mod set_resource {
+    use bstr::BString;
+
+    use crate::blob::{pipeline, ResourceKind};
+
+    /// The error returned by [Platform::set_resource](super::Platform::set_resource).
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("Can only diff blobs, not {mode:?}")]
+        InvalidMode { mode: gix_object::tree::EntryKind },
+        #[error("Failed to read {kind:?} worktree data from '{rela_path}'")]
+        Io {
+            rela_path: BString,
+            kind: ResourceKind,
+            source: std::io::Error,
+        },
+        #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")]
+        Attributes {
+            rela_path: BString,
+            kind: ResourceKind,
+            source: std::io::Error,
+        },
+        #[error(transparent)]
+        ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error),
+    }
+}
+
+///
+pub mod merge {
+    use crate::blob::pipeline::DriverChoice;
+    use crate::blob::platform::ResourceRef;
+    use crate::blob::{builtin_driver, BuiltinDriver, Driver, Resolution};
+    use bstr::BString;
+
+    /// The product of a [`prepare_merge()`](crate::blob::Platform::prepare_merge_state()) call to finally
+    /// perform the merge and retrieve the merge results.
+    #[derive(Copy, Clone)]
+    pub struct State<'parent> {
+        /// The platform that hosts the resources, used to access drivers.
+        pub(super) parent: &'parent super::Platform,
+        /// The current or our side of the merge operation.
+        pub current: ResourceRef<'parent>,
+        /// The ancestor or base of the merge operation.
+        pub ancestor: ResourceRef<'parent>,
+        /// The other or their side of the merge operation.
+        pub other: ResourceRef<'parent>,
+    }
+
+    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    pub struct Options {
+        /// If `true`, the resources being merged are contained in a virtual ancestor,
+        /// which is the case when merge bases are merged into one.
+        pub is_virtual_ancestor: bool,
+        /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible and it picks a side.
+        pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
+        /// Options for the builtin [text driver](BuiltinDriver::Text).
+        pub text: builtin_driver::text::Options,
+    }
+
+    ///
+    pub mod prepare_external_driver {
+        use std::ops::{Deref, DerefMut};
+
+        use crate::blob::ResourceKind;
+        use bstr::BString;
+
+        /// The error returned by [State::prepare_merge_command()](super::State::prepare_external_driver()).
+        #[derive(Debug, thiserror::Error)]
+        #[allow(missing_docs)]
+        pub enum Error {
+            #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")]
+            SourceOrDestinationAreBinary,
+            #[error(
+                "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
+            )]
+            CreateTempfile {
+                rela_path: BString,
+                kind: ResourceKind,
+                source: std::io::Error,
+            },
+            #[error(
+                "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
+            )]
+            WriteTempfile {
+                rela_path: BString,
+                kind: ResourceKind,
+                source: std::io::Error,
+            },
+        }
+
+        /// The product of a [`prepare_external_driver`](super::State::prepare_external_driver()) operation.
+        ///
+        /// This type acts like [`std::process::Command`], ready to run, with `stderr` set to *inherit*,
+        /// but `stdin` closed and `stdout` setup to be captured.
+        // TODO: remove this
+        #[allow(dead_code)]
+        pub struct Command {
+            /// The pre-configured command
+            cmd: std::process::Command,
+            /// A tempfile holding the *current* (ours) state of the resource.
+            current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
+            /// A tempfile holding the *ancestor* (base) state of the resource.
+            ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
+            /// A tempfile holding the *other* (their) state of the resource.
+            other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
+        }
+
+        impl Deref for Command {
+            type Target = std::process::Command;
+
+            fn deref(&self) -> &Self::Target {
+                &self.cmd
+            }
+        }
+
+        impl DerefMut for Command {
+            fn deref_mut(&mut self) -> &mut Self::Target {
+                &mut self.cmd
+            }
+        }
+    }
+
+    ///
+    pub mod builtin_merge {
+        /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](super::State::builtin_merge).
+        pub enum Pick {
+            /// Chose the ancestor.
+            Ancestor,
+            /// Chose our side.
+            Ours,
+            /// Chose their side.
+            Theirs,
+            /// New data was produced with the result of the merge, to be found in the buffer that was passed to
+            /// [builtin_merge()](super::State::builtin_merge).
+            Buffer,
+        }
+    }
+
+    /// The error returned by [State::merge()].
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error(transparent)]
+        PrepareExternalDriver(#[from] prepare_external_driver::Error),
+    }
+
+    /// Plumbing
+    impl<'parent> State<'parent> {
+        /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
+        /// prepare the invocation and temporary files needed to launch it according to protocol.
+        ///
+        /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
+        ///
+        /// ### Deviation
+        ///
+        /// We allow passing more context than Git would by taking a whole `context`, it's up to the caller to decide how much is filled.
+        pub fn prepare_external_driver(
+            &self,
+            _merge_command: BString,
+            _context: gix_command::Context,
+        ) -> Result<prepare_external_driver::Command, prepare_external_driver::Error> {
+            todo!("prepare command")
+        }
+
+        /// Perform the merge according to our resources and
+        /// Note that if the *pick* wasn't [`Buffer`](builtin_merge::Pick::Buffer), then `out` will not have been cleared.
+        pub fn builtin_merge(
+            &self,
+            _out: &mut Vec<u8>,
+            _driver: BuiltinDriver,
+            _opts: Options,
+        ) -> (builtin_merge::Pick, Resolution) {
+            todo!("do full merge")
+        }
+
+        /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
+        /// with the built-in driver to use instead.
+        pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
+            match self.current.driver_choice {
+                DriverChoice::BuiltIn(builtin) => Err(builtin),
+                DriverChoice::Index(idx) => self.parent.filter.drivers.get(idx).ok_or(BuiltinDriver::default()),
+            }
+        }
+    }
+
+    /// Convenience
+    impl<'parent> State<'parent> {
+        /// Perform the merge, possibly invoking an external merge command, and store the result in `out`.
+        /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
+        pub fn merge(
+            &self,
+            _out: &mut Vec<u8>,
+            _opts: Options,
+            _context: gix_command::Context,
+        ) -> Result<Resolution, Error> {
+            match self.configured_driver() {
+                Ok(driver) => {
+                    let _cmd = self.prepare_external_driver(driver.command.clone(), _context)?;
+                    todo!("invoke command and copy result")
+                }
+                Err(_builtin) => {
+                    todo!("call builtins and copy results")
+                }
+            }
+        }
+    }
+}
+
+///
+pub mod prepare_merge {
+    /// The error returned by [Platform::prepare_merge()](super::Platform::prepare_merge_state()).
+    #[derive(Debug, thiserror::Error)]
+    #[allow(missing_docs)]
+    pub enum Error {
+        #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")]
+        UnsetResource,
+        #[error("Tried to merge 'current' and 'other' where at least one of them is removed")]
+        CurrentOrOtherRemoved,
+    }
+}
+
+/// Lifecycle
+impl Platform {
+    /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able.
+    /// `filter_mode` decides how to do that specifically.
+    /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings.
+    pub fn new(filter: Pipeline, filter_mode: pipeline::Mode, attr_stack: gix_worktree::Stack) -> Self {
+        Platform {
+            current: None,
+            ancestor: None,
+            other: None,
+            filter,
+            filter_mode,
+            attr_stack,
+        }
+    }
+}
+
+/// Preparation
+impl Platform {
+    /// Store enough information about a resource to eventually use it in a merge, where…
+    ///
+    /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either
+    ///   be a resource in the worktree, or it's considered a non-existing, deleted object.
+    ///   If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided
+    ///   for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root
+    ///   is available for `kind`.
+    /// * `mode` is the kind of object (only blobs and links are allowed)
+    /// * `rela_path` is the relative path as seen from the (work)tree root.
+    /// * `kind` identifies the side of the merge this resource will be used for.
+    /// * `objects` provides access to the object database in case the resource can't be read from a worktree.
+    pub fn set_resource(
+        &mut self,
+        id: gix_hash::ObjectId,
+        mode: gix_object::tree::EntryKind,
+        rela_path: &BStr,
+        kind: ResourceKind,
+        objects: &impl gix_object::FindObjectOrHeader,
+    ) -> Result<(), set_resource::Error> {
+        self.set_resource_inner(id, mode, rela_path, kind, objects)
+    }
+
+    /// Returns the resource of the given kind if it was set.
+    pub fn resource(&self, kind: ResourceKind) -> Option<ResourceRef<'_>> {
+        let cache = match kind {
+            ResourceKind::CurrentOrOurs => self.current.as_ref(),
+            ResourceKind::CommonAncestorOrBase => self.ancestor.as_ref(),
+            ResourceKind::OtherOrTheirs => self.other.as_ref(),
+        }?;
+        ResourceRef::new(cache).into()
+    }
+
+    /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources.
+    pub fn prepare_merge_state(&self) -> Result<merge::State<'_>, prepare_merge::Error> {
+        let current = self.current.as_ref().ok_or(prepare_merge::Error::UnsetResource)?;
+        let ancestor = self.ancestor.as_ref().ok_or(prepare_merge::Error::UnsetResource)?;
+        let other = self.other.as_ref().ok_or(prepare_merge::Error::UnsetResource)?;
+
+        let out = merge::State {
+            parent: self,
+            current: ResourceRef::new(current),
+            ancestor: ResourceRef::new(ancestor),
+            other: ResourceRef::new(other),
+        };
+
+        match (current.conversion.data, other.conversion.data) {
+            (None, None) => Err(prepare_merge::Error::CurrentOrOtherRemoved),
+            (_, _) => Ok(out),
+        }
+    }
+}
+
+impl Platform {
+    fn set_resource_inner(
+        &mut self,
+        id: gix_hash::ObjectId,
+        mode: gix_object::tree::EntryKind,
+        rela_path: &BStr,
+        kind: ResourceKind,
+        objects: &impl gix_object::FindObjectOrHeader,
+    ) -> Result<(), set_resource::Error> {
+        if !matches!(
+            mode,
+            gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable
+        ) {
+            return Err(set_resource::Error::InvalidMode { mode });
+        }
+        let entry =
+            self.attr_stack
+                .at_entry(rela_path, None, objects)
+                .map_err(|err| set_resource::Error::Attributes {
+                    source: err,
+                    kind,
+                    rela_path: rela_path.to_owned(),
+                })?;
+
+        let storage = match kind {
+            ResourceKind::OtherOrTheirs => &mut self.other,
+            ResourceKind::CommonAncestorOrBase => &mut self.ancestor,
+            ResourceKind::CurrentOrOurs => &mut self.current,
+        };
+
+        let mut buf_storage = Vec::new();
+        let out = self.filter.convert_to_mergeable(
+            &id,
+            mode,
+            rela_path,
+            kind,
+            &mut |_, out| {
+                let _ = entry.matching_attributes(out);
+            },
+            objects,
+            self.filter_mode,
+            storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer),
+        )?;
+
+        match storage {
+            None => {
+                *storage = Some(Resource {
+                    id,
+                    rela_path: rela_path.to_owned(),
+                    conversion: out,
+                    mode,
+                    buffer: buf_storage,
+                });
+            }
+            Some(storage) => {
+                storage.id = id;
+                storage.rela_path = rela_path.to_owned();
+                storage.conversion = out;
+                storage.mode = mode;
+            }
+        };
+        Ok(())
+    }
+}
diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs
index 3a6cd994a5..8e608c53ab 100644
--- a/gix-merge/src/lib.rs
+++ b/gix-merge/src/lib.rs
@@ -1,2 +1,6 @@
 #![deny(rust_2018_idioms)]
 #![forbid(unsafe_code)]
+
+///
+#[cfg(feature = "blob")]
+pub mod blob;

From b09092c545f35555d806ce69d54fda7da9b9e9b8 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <sebastian.thiel@icloud.com>
Date: Sun, 15 Sep 2024 09:32:19 +0200
Subject: [PATCH 6/6] Implement `text` and `binary` merge algorithms, also with
 baseline tests for correctness.

---
 Cargo.lock                                    |   2 +
 gix-merge/Cargo.toml                          |   4 +
 gix-merge/src/blob/builtin_driver.rs          | 667 +++++++++++++++++-
 gix-merge/src/blob/platform.rs                |   2 +-
 .../generated-archives/text-baseline.tar      | Bin 0 -> 84480 bytes
 gix-merge/tests/fixtures/text-baseline.sh     | 207 ++++++
 gix-merge/tests/merge/blob/builtin_driver.rs  | 145 ++++
 gix-merge/tests/merge/blob/mod.rs             |   1 +
 gix-merge/tests/merge/main.rs                 |   4 +
 9 files changed, 1011 insertions(+), 21 deletions(-)
 create mode 100644 gix-merge/tests/fixtures/generated-archives/text-baseline.tar
 create mode 100644 gix-merge/tests/fixtures/text-baseline.sh
 create mode 100644 gix-merge/tests/merge/blob/builtin_driver.rs
 create mode 100644 gix-merge/tests/merge/blob/mod.rs
 create mode 100644 gix-merge/tests/merge/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 47d8945ec5..5912a30127 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2053,9 +2053,11 @@ dependencies = [
  "gix-object 0.44.0",
  "gix-path 0.10.11",
  "gix-tempfile 14.0.2",
+ "gix-testtools",
  "gix-trace 0.1.10",
  "gix-worktree 0.36.0",
  "imara-diff",
+ "pretty_assertions",
  "serde",
  "thiserror",
 ]
diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml
index b75d4cb384..6d8da01014 100644
--- a/gix-merge/Cargo.toml
+++ b/gix-merge/Cargo.toml
@@ -39,6 +39,10 @@ serde = { version = "1.0.114", optional = true, default-features = false, featur
 
 document-features = { version = "0.2.0", optional = true }
 
+[dev-dependencies]
+gix-testtools = { path = "../tests/tools" }
+pretty_assertions = "1.4.0"
+
 [package.metadata.docs.rs]
 all-features = true
 features = ["document-features"]
diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs
index cacef327ac..36bf78395a 100644
--- a/gix-merge/src/blob/builtin_driver.rs
+++ b/gix-merge/src/blob/builtin_driver.rs
@@ -23,8 +23,6 @@ impl BuiltinDriver {
 
 ///
 pub mod binary {
-    use crate::blob::Resolution;
-
     /// What to do when having to pick a side to resolve a conflict.
     #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
     pub enum ResolveWith {
@@ -36,7 +34,7 @@ pub mod binary {
         Theirs,
     }
 
-    /// Tell the caller of [`merge()`] which side was picked
+    /// Tell the caller of [`merge()`](function::merge) which side was picked.
     #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
     pub enum Pick {
         /// Chose the ancestor.
@@ -47,23 +45,32 @@ pub mod binary {
         Theirs,
     }
 
-    /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic.
-    ///
-    /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`.
-    pub fn merge(on_conflict: Option<ResolveWith>) -> (Pick, Resolution) {
-        match on_conflict {
-            None => (Pick::Ours, Resolution::Conflict),
-            Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete),
-            Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete),
-            Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete),
+    pub(super) mod function {
+        use crate::blob::builtin_driver::binary::{Pick, ResolveWith};
+        use crate::blob::Resolution;
+
+        /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic.
+        ///
+        /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`.
+        pub fn merge(on_conflict: Option<ResolveWith>) -> (Pick, Resolution) {
+            match on_conflict {
+                None => (Pick::Ours, Resolution::Conflict),
+                Some(resolve) => (
+                    match resolve {
+                        ResolveWith::Ours => Pick::Ours,
+                        ResolveWith::Theirs => Pick::Theirs,
+                        ResolveWith::Ancestor => Pick::Ancestor,
+                    },
+                    Resolution::Complete,
+                ),
+            }
         }
     }
 }
+pub use binary::function::merge as binary;
 
 ///
 pub mod text {
-    use crate::blob::Resolution;
-
     /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express
     /// merge conflicts in the resulting file.
     #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
@@ -115,8 +122,11 @@ pub mod text {
     }
 
     /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
-    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    #[derive(Copy, Clone, Debug, Eq, PartialEq)]
     pub struct Options {
+        /// Determine of the diff will be performed.
+        /// Defaults to [`imara_diff::Algorithm::Myers`].
+        pub diff_algorithm: imara_diff::Algorithm,
         /// How to visualize conflicts in merged files.
         pub conflict_style: ConflictStyle,
         /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<`
@@ -132,10 +142,17 @@ pub mod text {
                 conflict_style: Default::default(),
                 marker_size: 7,
                 on_conflict: None,
+                diff_algorithm: imara_diff::Algorithm::Myers,
             }
         }
     }
 
+    impl Options {
+        fn resolves_with_theirs_or_ours(&self) -> bool {
+            matches!(self.on_conflict, Some(ResolveWith::Ours | ResolveWith::Theirs))
+        }
+    }
+
     /// What to do to resolve a conflict.
     #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
     pub enum ResolveWith {
@@ -147,10 +164,620 @@ pub mod text {
         Union,
     }
 
-    /// Merge `current` and `other` with `ancestor` as base according to `opts`.
-    ///
-    /// Place the merged result in `out` and return the resolution.
-    pub fn merge(_out: &mut Vec<u8>, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution {
-        todo!("text merge");
+    pub(super) mod function {
+        use crate::blob::builtin_driver::text::{ConflictStyle, Options, ResolveWith};
+        use crate::blob::Resolution;
+        use bstr::{BStr, ByteSlice, ByteVec};
+        use std::ops::Range;
+
+        /// Merge `current` and `other` with `ancestor` as base according to `opts`.
+        /// Use `current_label`, `other_label` and `ancestor_label` to annotate conflict sections.
+        ///
+        /// `input` is for reusing memory of lists of tokens, and `other_tokens` is memory
+        /// for storing tokens for `other`.
+        /// Place the merged result in `out` (cleared before use) and return the resolution.
+        ///
+        /// # Important
+        ///
+        /// *The caller* is responsible for clearing it, otherwise tokens will accumulate.
+        /// This idea is to save time if the input is known to be very similar.
+        #[allow(clippy::too_many_arguments)]
+        pub fn merge<'a>(
+            out: &mut Vec<u8>,
+            input: &mut imara_diff::intern::InternedInput<&'a [u8]>,
+            current: &'a [u8],
+            current_label: Option<&BStr>,
+            ancestor: &'a [u8],
+            ancestor_label: Option<&BStr>,
+            other: &'a [u8],
+            other_label: Option<&BStr>,
+            opts: Options,
+        ) -> Resolution {
+            out.clear();
+            input.update_before(tokens(ancestor));
+            input.update_after(tokens(current));
+
+            let current_hunks = imara_diff::diff(
+                opts.diff_algorithm,
+                input,
+                CollectHunks {
+                    side: Side::Current,
+                    hunks: Default::default(),
+                },
+            );
+
+            let current_tokens = std::mem::take(&mut input.after);
+            input.update_after(tokens(other));
+
+            let mut hunks = imara_diff::diff(
+                opts.diff_algorithm,
+                input,
+                CollectHunks {
+                    side: Side::Other,
+                    hunks: current_hunks,
+                },
+            );
+
+            hunks.sort_by(|a, b| a.before.start.cmp(&b.before.start));
+            let mut hunks = hunks.into_iter().filter(|h| match opts.on_conflict {
+                Some(ResolveWith::Ours) => h.side == Side::Current,
+                Some(ResolveWith::Theirs) => h.side == Side::Other,
+                _ => true,
+            });
+            let mut intersecting = Vec::new();
+            let mut ancestor_integrated_until = 0;
+            let mut resolution = Resolution::Complete;
+            // TODO(performance): instead of skipping hunks, let's not compute these ones at all, but only once all tests are there.
+            let resolves_with_theirs_or_ours = opts.resolves_with_theirs_or_ours();
+            let mut filled_hunks = Vec::with_capacity(2);
+            while let Some(hunk) = hunks.next() {
+                if !resolves_with_theirs_or_ours && take_intersecting(&hunk, &mut hunks, &mut intersecting) {
+                    fill_ancestor(&hunk.before, &mut intersecting);
+
+                    let filled_hunks_side = hunk.side;
+                    filled_hunks.clear();
+                    filled_hunks.push(hunk);
+                    fill_ancestor(
+                        &intersecting
+                            .first()
+                            .zip(intersecting.last())
+                            .map(|(f, l)| f.before.start..l.before.end)
+                            .expect("at least one entry"),
+                        &mut filled_hunks,
+                    );
+                    match opts.on_conflict {
+                        None => {
+                            let (hunks_front_and_back, num_hunks_front) = match opts.conflict_style {
+                                ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => zealously_contract_hunks(
+                                    &mut filled_hunks,
+                                    &mut intersecting,
+                                    input,
+                                    &current_tokens,
+                                ),
+                                ConflictStyle::Diff3 => (Vec::new(), 0),
+                            };
+                            let (our_hunks, their_hunks) = match filled_hunks_side {
+                                Side::Current => (&filled_hunks, &intersecting),
+                                Side::Other => (&intersecting, &filled_hunks),
+                                Side::Ancestor => {
+                                    unreachable!("initial hunks are never ancestors")
+                                }
+                            };
+                            // TODO: dedup preamble, simplify this - we know that our and their hunks aren't empty.
+                            let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front);
+                            let first_hunk = front_hunks
+                                .first()
+                                .or(our_hunks.first())
+                                .expect("at least one hunk to write");
+                            let last_hunk = back_hunks.last().or(their_hunks.last()).expect("at least one hunk");
+                            write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out);
+
+                            write_hunks(front_hunks, input, &current_tokens, out);
+                            if their_hunks.is_empty() {
+                                // TODO: assure we run into this
+                                write_hunks(our_hunks, input, &current_tokens, out);
+                            } else if our_hunks.is_empty() {
+                                // TODO: assure we run into this
+                                write_hunks(their_hunks, input, &current_tokens, out);
+                            } else {
+                                resolution = Resolution::Conflict;
+                                let our_nl = detect_line_ending(our_hunks, input, &current_tokens);
+                                let their_nl = detect_line_ending(their_hunks, input, &current_tokens);
+                                match opts.conflict_style {
+                                    ConflictStyle::Merge => {
+                                        write_conflict_marker(out, b'<', current_label, opts.marker_size, our_nl);
+                                        write_hunks(our_hunks, input, &current_tokens, out);
+                                        write_conflict_marker(out, b'=', None, opts.marker_size, their_nl);
+                                        write_hunks(their_hunks, input, &current_tokens, out);
+                                        write_conflict_marker(out, b'>', other_label, opts.marker_size, their_nl);
+                                    }
+                                    ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => {
+                                        write_conflict_marker(out, b'<', current_label, opts.marker_size, our_nl);
+                                        write_hunks(our_hunks, input, &current_tokens, out);
+                                        let ancestor_hunk = Hunk {
+                                            before: first_hunk.before.start..last_hunk.before.end,
+                                            after: Default::default(),
+                                            side: Side::Ancestor,
+                                        };
+                                        let ancestor_hunk = std::slice::from_ref(&ancestor_hunk);
+                                        let ancestor_nl = detect_line_ending(ancestor_hunk, input, &current_tokens);
+                                        write_conflict_marker(out, b'|', ancestor_label, opts.marker_size, ancestor_nl);
+                                        write_hunks(ancestor_hunk, input, &current_tokens, out);
+                                        write_conflict_marker(out, b'=', None, opts.marker_size, their_nl);
+                                        write_hunks(their_hunks, input, &current_tokens, out);
+                                        write_conflict_marker(out, b'>', other_label, opts.marker_size, their_nl);
+                                    }
+                                }
+                            }
+                            write_hunks(back_hunks, input, &current_tokens, out);
+                            // TODO: have a sample that validates this!
+                            ancestor_integrated_until = last_hunk.before.end;
+                        }
+                        Some(resolve) => {
+                            match resolve {
+                                ResolveWith::Ours | ResolveWith::Theirs => {
+                                    unreachable!("we should have chosen to integrate the hunks directly")
+                                }
+                                ResolveWith::Union => {
+                                    let (hunks_front_and_back, num_hunks_front) = zealously_contract_hunks(
+                                        &mut filled_hunks,
+                                        &mut intersecting,
+                                        input,
+                                        &current_tokens,
+                                    );
+
+                                    let (our_hunks, their_hunks) = match filled_hunks_side {
+                                        Side::Current => (&filled_hunks, &intersecting),
+                                        Side::Other => (&intersecting, &filled_hunks),
+                                        Side::Ancestor => {
+                                            unreachable!("initial hunks are never ancestors")
+                                        }
+                                    };
+                                    let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front);
+                                    let first_hunk = front_hunks
+                                        .first()
+                                        .or(our_hunks.first())
+                                        .expect("at least one hunk to write");
+                                    write_ancestor(
+                                        input,
+                                        ancestor_integrated_until,
+                                        first_hunk.before.start as usize,
+                                        out,
+                                    );
+                                    write_hunks(front_hunks, input, &current_tokens, out);
+                                    assure_ends_with_nl(out, detect_line_ending(front_hunks, input, &current_tokens));
+                                    write_hunks(our_hunks, input, &current_tokens, out);
+                                    assure_ends_with_nl(out, detect_line_ending(our_hunks, input, &current_tokens));
+                                    write_hunks(their_hunks, input, &current_tokens, out);
+                                    if !back_hunks.is_empty() {
+                                        assure_ends_with_nl(
+                                            out,
+                                            detect_line_ending(their_hunks, input, &current_tokens),
+                                        );
+                                    }
+                                    write_hunks(back_hunks, input, &current_tokens, out);
+                                    // TODO: have a sample that validates this!
+                                    ancestor_integrated_until = back_hunks
+                                        .last()
+                                        .or(their_hunks.last())
+                                        .expect("at least one hunk")
+                                        .before
+                                        .end;
+                                }
+                            };
+                        }
+                    }
+                } else {
+                    write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out);
+                    ancestor_integrated_until = hunk.before.end;
+                    write_hunks(std::slice::from_ref(&hunk), input, &current_tokens, out);
+                }
+            }
+            write_ancestor(input, ancestor_integrated_until, input.after.len(), out);
+
+            resolution
+        }
+
+        // TODO: find actual line ending based on hunks
+        fn detect_line_ending(
+            _hunks: &[Hunk],
+            _input: &mut imara_diff::intern::InternedInput<&[u8]>,
+            _current_tokens: &[imara_diff::intern::Token],
+        ) -> &'static BStr {
+            b"\n".into()
+        }
+
+        fn assure_ends_with_nl(out: &mut Vec<u8>, nl: &BStr) {
+            if !out.is_empty() && !out.ends_with(b"\n") {
+                out.push_str(nl);
+            }
+        }
+
+        fn write_conflict_marker(out: &mut Vec<u8>, marker: u8, label: Option<&BStr>, marker_size: usize, nl: &BStr) {
+            assure_ends_with_nl(out, nl);
+            out.extend(std::iter::repeat(marker).take(marker_size));
+            if let Some(label) = label {
+                out.push(b' ');
+                out.extend_from_slice(label);
+            }
+            out.push_str(nl);
+        }
+
+        fn write_ancestor(input: &imara_diff::intern::InternedInput<&[u8]>, from: u32, to: usize, out: &mut Vec<u8>) {
+            if to < from as usize {
+                return;
+            }
+            if let Some(tokens) = input.before.get(from as usize..to) {
+                write_tokens(&input.interner, tokens, out);
+            }
+        }
+
+        /// Look at all hunks in `in_out` and fill in the ancestor in the range of `ancestor_range`.
+        /// This is all based on knowing the ranges are sequences of tokens.
+        fn fill_ancestor(Range { start, end }: &Range<u32>, in_out: &mut Vec<Hunk>) {
+            if in_out.is_empty() {
+                return;
+            }
+
+            fn ancestor_hunk(start: u32, num_lines: u32) -> Hunk {
+                let range = start..start + num_lines;
+                Hunk {
+                    before: range.clone(),
+                    after: range,
+                    side: Side::Ancestor,
+                }
+            }
+
+            fn is_nonzero(num: &u32) -> bool {
+                *num > 0
+            }
+
+            let first = &in_out[0];
+            let mut first_idx = 0;
+            if let Some(lines_to_add) = first.before.start.checked_sub(*start).filter(is_nonzero) {
+                in_out.insert(0, ancestor_hunk(*start, lines_to_add));
+                first_idx += 1;
+            }
+
+            let mut added_hunks = false;
+            for (idx, next_idx) in (first_idx..in_out.len()).map(|idx| (idx, idx + 1)) {
+                let Some(next_hunk) = in_out.get(next_idx) else { break };
+                let hunk = &in_out[idx];
+                if let Some(lines_to_add) = next_hunk.after.start.checked_sub(hunk.after.end).filter(is_nonzero) {
+                    in_out.push(ancestor_hunk(hunk.after.end, lines_to_add));
+                    added_hunks = true;
+                }
+            }
+            let in_out_len = in_out.len();
+            if added_hunks {
+                in_out[first_idx..in_out_len].sort_by_key(|hunk| hunk.before.start);
+            }
+
+            let last = &in_out[in_out_len - 1];
+            if let Some(lines_to_add) = end.checked_sub(last.before.end).filter(is_nonzero) {
+                in_out.push(ancestor_hunk(last.before.end, lines_to_add));
+            }
+        }
+
+        /// Reduce the area of `a_hunks` and the hunks in `b_hunks` so that only those lines that are
+        /// actually different remain. Note that we have to compare the resolved values, not only the tokens,
+        /// so `current_tokens` is expected to be known to the `input` (and its `interner`).
+        /// Hunks from all input arrays maybe removed in the process from the front and back, in case they
+        /// are entirely equal to what's in `hunk`. Note also that `a_hunks` and `b_hunks` are treated to be consecutive,
+        /// so [`fill_ancestor()`] must have been called beforehand, and are assumed to covert the same space in the
+        /// ancestor buffer.
+        /// Use `mode` to determine how hunks may be handled.
+        ///
+        /// Return a new vector of all the hunks that were removed from front and back, with partial hunks inserted,
+        /// along with the amount of hunks that go front, with the remaining going towards the back.
+        // TODO: refactor so hunks and their associated data can go into an array for easier handling.
+        #[must_use]
+        fn zealously_contract_hunks(
+            a_hunks: &mut Vec<Hunk>,
+            b_hunks: &mut Vec<Hunk>,
+            input: &imara_diff::intern::InternedInput<&[u8]>,
+            current_tokens: &[imara_diff::intern::Token],
+        ) -> (Vec<Hunk>, usize) {
+            let line_content = |token_idx: u32, side: Side| {
+                let tokens = match side {
+                    Side::Current => current_tokens,
+                    Side::Other => &input.after,
+                    Side::Ancestor => &input.before,
+                };
+                &input.interner[tokens[token_idx as usize]]
+            };
+            fn range_by_side(hunk: &mut Hunk) -> &mut Range<u32> {
+                match hunk.side {
+                    Side::Current | Side::Other => &mut hunk.after,
+                    Side::Ancestor => &mut hunk.before,
+                }
+            }
+            fn truncate_hunks_from_from_front(
+                hunks: &mut Vec<Hunk>,
+                hunks_to_remove_until_idx: Option<usize>,
+                hunk_token_equal_till: Option<u32>,
+                mut out_hunks: Option<&mut Vec<Hunk>>,
+            ) {
+                let Some(hunks_to_remove_until_idx) = hunks_to_remove_until_idx else {
+                    assert!(hunk_token_equal_till.is_none());
+                    return;
+                };
+                let mut last_index_to_remove = Some(hunks_to_remove_until_idx);
+                let hunk = &mut hunks[hunks_to_remove_until_idx];
+                let range = range_by_side(hunk);
+                if let Some(hunk_token_equal_till) = hunk_token_equal_till {
+                    let orig_start = range.start;
+                    let new_start = hunk_token_equal_till + 1;
+                    range.start = new_start;
+                    if Range::<u32>::is_empty(range) {
+                        range.start = orig_start;
+                    } else if let Some(out) = out_hunks.as_deref_mut() {
+                        last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1);
+                        let mut removed_hunk = hunk.clone();
+                        let new_range = range_by_side(&mut removed_hunk);
+
+                        new_range.start = orig_start;
+                        new_range.end = new_start;
+
+                        out.push(removed_hunk);
+                    } else {
+                        last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1);
+                    }
+                }
+                if let Some(last_index_to_remove) = last_index_to_remove {
+                    let mut current_idx = 0;
+                    hunks.retain(|hunk| {
+                        if current_idx > last_index_to_remove {
+                            true
+                        } else {
+                            current_idx += 1;
+                            if let Some(out) = out_hunks.as_deref_mut() {
+                                out.push(hunk.clone());
+                            }
+                            false
+                        }
+                    });
+                }
+            }
+
+            fn truncate_hunks_from_from_back(
+                hunks: &mut Vec<Hunk>,
+                remove_trailing_hunks_from_idx: Option<usize>,
+                hunk_token_equal_from: Option<u32>,
+                mut out_hunks: Option<&mut Vec<Hunk>>,
+            ) {
+                let Some(mut remove_trailing_hunks_from_idx) = remove_trailing_hunks_from_idx else {
+                    assert!(hunk_token_equal_from.is_none());
+                    return;
+                };
+
+                let hunk = &mut hunks[remove_trailing_hunks_from_idx];
+                let range = range_by_side(hunk);
+                if let Some(hunk_token_equal_from) = hunk_token_equal_from {
+                    let orig_end = range.end;
+                    let new_end = hunk_token_equal_from;
+                    range.end = new_end;
+                    if Range::<u32>::is_empty(range) {
+                        range.end = orig_end;
+                    } else if let Some(out) = out_hunks.as_deref_mut() {
+                        remove_trailing_hunks_from_idx += 1;
+                        let mut removed_hunk = hunk.clone();
+                        let new_range = range_by_side(&mut removed_hunk);
+
+                        new_range.start = new_end;
+                        new_range.end = orig_end;
+
+                        out.push(removed_hunk);
+                    } else {
+                        remove_trailing_hunks_from_idx += 1;
+                    }
+                }
+                if let Some(out) = out_hunks {
+                    out.extend_from_slice(&hunks[remove_trailing_hunks_from_idx..]);
+                }
+                hunks.truncate(remove_trailing_hunks_from_idx);
+            }
+
+            let (mut last_a_hunk_idx, mut last_b_hunk_idx) = (0, 0);
+            let (mut out, hunks_in_front) = {
+                let (mut remove_leading_a_hunks_from, mut remove_leading_b_hunks_from) = (None, None);
+                let (mut a_hunk_token_equal_till, mut b_hunk_token_equal_till) = (None, None);
+                for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in
+                    iterate_hunks(a_hunks).zip(iterate_hunks(b_hunks))
+                {
+                    let a_line = line_content(a_token_idx, a_hunk_side).as_bstr();
+                    let b_line = line_content(b_token_idx, b_hunk_side).as_bstr();
+
+                    if last_a_hunk_idx != a_hunk_idx {
+                        a_hunk_token_equal_till = None;
+                        last_a_hunk_idx = a_hunk_idx;
+                    }
+                    if last_b_hunk_idx != b_hunk_idx {
+                        b_hunk_token_equal_till = None;
+                        last_b_hunk_idx = b_hunk_idx;
+                    }
+                    if a_line == b_line {
+                        (remove_leading_a_hunks_from, remove_leading_b_hunks_from) =
+                            (Some(a_hunk_idx), Some(b_hunk_idx));
+                        (a_hunk_token_equal_till, b_hunk_token_equal_till) = (Some(a_token_idx), Some(b_token_idx));
+                    } else {
+                        break;
+                    }
+                }
+
+                let mut out = Vec::with_capacity(remove_leading_a_hunks_from.unwrap_or_else(|| {
+                    if a_hunk_token_equal_till.is_some() {
+                        1
+                    } else {
+                        0
+                    }
+                }));
+                truncate_hunks_from_from_front(
+                    a_hunks,
+                    remove_leading_a_hunks_from,
+                    a_hunk_token_equal_till,
+                    Some(&mut out),
+                );
+                truncate_hunks_from_from_front(b_hunks, remove_leading_b_hunks_from, b_hunk_token_equal_till, None);
+                let hunks_in_front = out.len();
+                (out, hunks_in_front)
+            };
+
+            (last_a_hunk_idx, last_b_hunk_idx) = (0, 0);
+            {
+                let (mut remove_trailing_a_hunks_from, mut remove_trailing_b_hunks_from) = (None, None);
+                let (mut a_hunk_token_equal_from, mut b_hunk_token_equal_from) = (None, None);
+                for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in
+                    iterate_hunks_rev(a_hunks).zip(iterate_hunks_rev(b_hunks))
+                {
+                    let a_line = line_content(a_token_idx, a_hunk_side).as_bstr();
+                    let b_line = line_content(b_token_idx, b_hunk_side).as_bstr();
+
+                    if last_a_hunk_idx != a_hunk_idx {
+                        a_hunk_token_equal_from = None;
+                        last_a_hunk_idx = a_hunk_idx;
+                    }
+                    if last_b_hunk_idx != b_hunk_idx {
+                        b_hunk_token_equal_from = None;
+                        last_b_hunk_idx = b_hunk_idx;
+                    }
+
+                    if a_line == b_line {
+                        (remove_trailing_a_hunks_from, remove_trailing_b_hunks_from) =
+                            (Some(a_hunk_idx), Some(b_hunk_idx));
+                        (a_hunk_token_equal_from, b_hunk_token_equal_from) = (Some(a_token_idx), Some(b_token_idx));
+                    } else {
+                        break;
+                    }
+                }
+
+                truncate_hunks_from_from_back(
+                    a_hunks,
+                    remove_trailing_a_hunks_from,
+                    a_hunk_token_equal_from,
+                    Some(&mut out),
+                );
+                truncate_hunks_from_from_back(b_hunks, remove_trailing_b_hunks_from, b_hunk_token_equal_from, None);
+            }
+
+            (out, hunks_in_front)
+        }
+
+        /// Return an iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`.
+        fn iterate_hunks(hunks: &[Hunk]) -> impl Iterator<Item = (u32, usize, Side)> + '_ {
+            hunks.iter().enumerate().flat_map(|(hunk_idx, hunk)| {
+                match hunk.side {
+                    Side::Current | Side::Other => &hunk.after,
+                    Side::Ancestor => &hunk.before,
+                }
+                .clone()
+                .map(move |idx| (idx, hunk_idx, hunk.side))
+            })
+        }
+
+        /// Return a reverse iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`.
+        fn iterate_hunks_rev(hunks: &[Hunk]) -> impl Iterator<Item = (u32, usize, Side)> + '_ {
+            hunks.iter().enumerate().rev().flat_map(|(hunk_idx, hunk)| {
+                match hunk.side {
+                    Side::Current | Side::Other => &hunk.after,
+                    Side::Ancestor => &hunk.before,
+                }
+                .clone()
+                .rev()
+                .map(move |idx| (idx, hunk_idx, hunk.side))
+            })
+        }
+
+        fn write_hunks(
+            hunks: &[Hunk],
+            input: &imara_diff::intern::InternedInput<&[u8]>,
+            current_tokens: &[imara_diff::intern::Token],
+            out: &mut Vec<u8>,
+        ) {
+            for hunk in hunks {
+                let (tokens, range) = match hunk.side {
+                    Side::Current => (current_tokens, &hunk.after),
+                    Side::Other => (input.after.as_slice(), &hunk.after),
+                    Side::Ancestor => (input.before.as_slice(), &hunk.before),
+                };
+                write_tokens(&input.interner, &tokens[usize_range(range)], out);
+            }
+        }
+
+        fn usize_range(range: &Range<u32>) -> Range<usize> {
+            range.start as usize..range.end as usize
+        }
+
+        fn write_tokens(
+            interner: &imara_diff::intern::Interner<&[u8]>,
+            tokens: &[imara_diff::intern::Token],
+            out: &mut Vec<u8>,
+        ) {
+            for token in tokens {
+                out.extend_from_slice(interner[*token]);
+            }
+        }
+
+        /// Find all hunks in `iter` which aren't from the same side as `hunk` and intersect with it.
+        /// Return `true` if `out` is non-empty after the operation, indicating overlapping hunks were found.
+        fn take_intersecting(hunk: &Hunk, iter: impl Iterator<Item = Hunk>, out: &mut Vec<Hunk>) -> bool {
+            let mut iter = iter.peekable();
+            out.clear();
+
+            while iter
+                .peek()
+                .filter(|b_hunk| {
+                    b_hunk.side != hunk.side
+                        && (hunk.before.contains(&b_hunk.before.start)
+                            || (hunk.before.is_empty() && hunk.before.start == b_hunk.before.start))
+                })
+                .is_some()
+            {
+                out.extend(iter.next());
+            }
+            !out.is_empty()
+        }
+
+        fn tokens(input: &[u8]) -> imara_diff::sources::ByteLines<'_, true> {
+            imara_diff::sources::byte_lines_with_terminator(input)
+        }
+
+        #[derive(Debug, Copy, Clone, Eq, PartialEq)]
+        enum Side {
+            Current,
+            Other,
+            /// A special marker that is just used to be able to mix-in hunks that only point to the ancestor.
+            /// Only `before` matters then.
+            Ancestor,
+        }
+
+        #[derive(Debug, Clone)]
+        struct Hunk {
+            before: Range<u32>,
+            after: Range<u32>,
+            side: Side,
+        }
+
+        struct CollectHunks {
+            hunks: Vec<Hunk>,
+            side: Side,
+        }
+
+        impl imara_diff::Sink for CollectHunks {
+            type Out = Vec<Hunk>;
+
+            fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
+                self.hunks.push(Hunk {
+                    before,
+                    after,
+                    side: self.side,
+                });
+            }
+
+            fn finish(self) -> Self::Out {
+                self.hunks
+            }
+        }
     }
 }
+pub use text::function::merge as text;
diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs
index 497b9bf887..6b6175ee40 100644
--- a/gix-merge/src/blob/platform.rs
+++ b/gix-merge/src/blob/platform.rs
@@ -137,7 +137,7 @@ pub mod merge {
         pub other: ResourceRef<'parent>,
     }
 
-    #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
+    #[derive(Copy, Clone, Debug, Eq, PartialEq)]
     pub struct Options {
         /// If `true`, the resources being merged are contained in a virtual ancestor,
         /// which is the case when merge bases are merged into one.
diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar
new file mode 100644
index 0000000000000000000000000000000000000000..d32d4b29b6fb3d9987dc406156c3c0ac6f96ce78
GIT binary patch
literal 84480
zcmeHQ-H+q85%+8KR|v?%B}Kg}>N81u1%fth9}4v43S1fl2dyo8)!Ne5mUDMS(Z9W<
zK1q=h^+Bq(cf1d4Q{)UaGe2=O9Fh-@EA1Z(e*({2k^lJ9{-2j2%k`0ETeg#54;<TD
zdD?r18C=d;crwk|tnyo<Zf-WgWeB{4rz{Q+>tmR0o;9%~q8X=ugnTCe{cF+(O_tIB
z<2K&yoQGp{+DBWu*1YdT|Gw+CabNwmY~-z2{3F*2aQ%5~hqZp?__vJT79W$fg!FPc
zKRg}MC*wUb-rL4|$9V4=|0zDLx=-ObD(WJ`HXOq>-WDIywrcvj;?ruHJx6iH$TNm%
z3~DKq0IT&J`Rhv(r-{O+fDHeZZMX67Ik*h)uTaq)q^uI(6-)ty9fG>kViy1PQ(x)b
z%JFZt@$Yz!5BOKA1u?M-|Hb$Y=od@reoI;W*Pfix8<yf9xg7po-vRt9)q<FqWmG;p
zU|%eu`z~eiU-<vI7oac*AjQAUjsG6<aU5_7(ZBllfBO9%o&igk+%@qK;9udJe{rWW
z{3C}O|6QAe(w7hv;2+6DfOZ|xvkiG558fq~FkCe}So~ArST*ExDl+_AC}{ituH`}e
zPoWArN#*bV5d7<u&%Iw@@&9iv6M!=O+m>Mb_hJ5T3ERu@FP#FkN6-9$pRk1X=JR0j
zU%da9UVyR)fDHeF@!xY{{(ru52T*|Ve}ItgxXQym&UD`%!T-4YFDnSJ{}1_px;3*}
z5&b`&qTN{|tMU3rR)7%bU!zVc(zX5{vv9A;2qx$s`7Y#tRyxx8ztR756mInxz!d%C
z0RZr?Q}HG3T>snXyg9`$8BY7t!IJ2|FaH<eXMHRG*R}(Q|Lf32o95q=Bs@l2^Ax?L
z=QvAGpMPUU+c-0`=lE>y;zMM<q0HlrnWZLafv-nYqX)>{ZA<nxe&_g)lU=H@yAz)O
zR$#&NU!zKD(zX5{qfeW|WgFF3)P0dX{~es`(SH6rc>NE||EkkL7o{&v`~qkm&TI0h
zKbz}yiXZGJ@};zg_3o4&-*RSRd};nAPH>CtIm}G5hAuNd#@Vy^=F2x`c#6zTdOSu+
zc7DJ$|CpZ4(0n<i|A;o3c}&mQ;j<Yh=I?)<4<ozua7aJK$=>{!UJhIHDZ;h)X}UGf
zQD&w)Orh)yUOkd!(J48bUtN-t10iuV<DL0Ay_g@v1P@)Qc{%4J)bZ=@aN}P+{FH`2
zlP~{;VX9>axnlqP!}ot^m8<+I!@q^sdbaRyxiJ5$QU#r~aOmed?8oiSkNNxG<1vgA
z07<8~?(Ra4|MZDGnDxB;V~I`h|L_7(2j2e|ur^x%kDA^CCg|UG@cJK!|LZpZM*lD2
z=ARl^PSU>(@qZ1f;7I2B-=xVd-gDPf70CQQn~(n?8^QV?Rr2qn^V23hML!!~H1WS(
zdcvE5{DNNsaX8p~Yg)z^<lXR?ZX^6FJ6$5<izo7i_txBnhco_lNcZ94fSt(8%QnpL
zJ7HBV-Y1y)25%2g7ro%$=@G{oBfQFdL(%N(BwW)bf#c<n#p}&;nCx|ge)4`#c>W{T
zhw)#V(ka5U{xxm?lKcOC^FOxhwBP@oz=r%!N=@3nR;izi9n{+Z7-V_qd;I4bp7A68
z`^NZtoeIh{saFo?O^RJ$Ji|8v+~nJO^~gJe|Et!MFyzV<#C!u9ZTAj@S1U&9Mf#`e
z?O&w$w>&QX?;(i)En=P`fb$rA2O+^lw6Q!B2LJVQUu{2R?|&8^>s#@EJpG4Y{!gtM
zgw#a%7gmJAyOaQf|JsRDd#jW15Ah$hO6Z_Q@BdPH>m600_zoq&;J@(yRd4?y_y76v
z-?IS!i&z_s|BHzEYMJo(SHAs=6#rzer<VWkS~l$exP;M6i2qfaJBz{u5X@UbX$pBT
z_@|<-3Ry_;FM9v?1IYhXse>+BA`o}gh4TB80E2%$+rLQhFPQ&x@d+Rh|69P^CXD~p
zArOq|)S)Ao|64$7>R2#J)U*AI6#s(pAD{mK@qcw%?xqCef46L2!xi$}KH!C14<t)Z
z(+Yt8_&>sjzWMF<e-s3e|G9+yo#W%nA$qK5e$BF=9?z=+>fNv^%7=?GTx854MDIF?
zS03g+XAxu^Sxm9`J&Pn48z*Ze@gZ3Xh?b(wIczc6;<p0AbS4j`oBuSJ5x>zpQ`~5u
zDW+|JxxiQgb43vYp@ryR)L}U~San*IK4u-3rjMb}0yl>!Jmuh_o;y@wV|`R1ZF^KP
zV|i40V|P^H7{Wq}qiD@$&|zsRS#(;MdL|u~rJhlbMHPMEY^p47E59r*J5Dl9&7!yE
z1%YLiSK6vCi^nqPu`soadM!;YD~ZMFU{=s9-H>MVWOS)WW5wMGEs$_n+iT)VpUVM!
zjq;zAJ@!;;jHjnkd8@gt_Ecjmx2GPtl%Lya_Eb{YF`>ox@Ls%w9Z0#RE_Nd9`a0Qx
z%<Jo@5Yg<;$c-to<qbU8avG}a6$YK`C3Q@MNYllv12wwXbs|77!wzfK%T#EYxbR<>
zXkEyG7~q=hyt9szbA%6bPZMgykydT&S6Y=eX=!D~%B8g%!<Sao)nOrGq`kR<j(4E%
z7P{Vv<lE_d$5Y{UZUFbRKJ2@W5iJpU1RmEQ0hyty=;MU--yS{y3if|dA$$o9nf_I*
z3Gbi(vGDW{xBffuT@Ug<B)U^^xjFr}$^?;GMD6q7#objdh_q4dw$*FF&+(`FEJ0~=
z%J+n&s4Dbc(U3~M@X>I!+DoXK4*oC0KY#v@jSql;{T~Jkct*tr{{fzG{4|3SG`&R$
zNbz4k?Wf<J9RK|I?^uxkIbGtzTT=KR-Uw>mjRd6luN}APw=BoM-Om3C@Btv;|4$eB
z;Fb;k2RDI=cO(HR{<#?K_?wsGzdiojR^Y(-KjS4mzD0xoV0>d}aq|)whW~kM!e#ik
zEpGjH5V(N<C5)@V|D^a|wH6$Lc}plu0}m<wsmSY8_A>lih&%r!@L~PuRB?}L*We$b
zf1?C<CzYl6XVOci-Lwq<{QMvG2;ls;X~G`as=@!H@gL^@M#}rzs!Q=-y#F(4PGj#u
zhJSwiw~-D0|5!0ks^8!r#{WqIyzxe4dQZG=Mcs`N{0AP+=P5qz|D?1X|HtP)1d#uw
zP6eHmy7jLj^Q&M>6x@IQ2lxJmU;kkKr&DPPFOdB|E6=aT7V0d3{4|it>+5MyPa7KK
zERZTp^dtUWZ4ubhn2I^%Cyyw+Um(RwphpV|GQSF5GW=UM_xulB1o2;1Q+j??`~MPT
zl0$y<Z~>~<DuEvSlS}#X;JJO5;vaG6e|WwF`+v=q`;hh({sH?#q;-FFdhlPnaC7f^
z3jRG0_WznI_aW^G{;SjahSY~T_a%WI{42=(Dr7YQ|A7toU&PR0{9i=OSIb0(|9P2T
z!|;!M&j0sq*#C18n<4lg9sh%H-XgkLf=LhlsVMBco=h13eFXU*^MpRIHNii`{ss!`
z4y*OxUqR+qAut*KZM@e*>-;a@@ge@Vgt<)`|6%@b3DqpZqX+*AGQWo5-{Rx{fd}|s
zME@}UFCylvWl|j5ptpNu{2!VBW#I!rA^%IST5{^;u^sg8m&&=y{67cBdRzXV?YlOt
z|5vUFo?sRGX&>KWke$rYdh;A6`)Ip<`n-Nhv**0jUT5k0DXOo<3Ep~klOE$_U+)K2
zlB=f>OLA<(p+u{2Q(u?Lj@H>_We2Dj2Bfy|+Fz|oIVsEa67i7XA7j6bf7kM1{~wVo
zRhRW)ibX38bJEJL9smF=)k;~AUi-PPgS8C*1nceb-$9W7p+gsKn$z&#rUJijRRV2C
zO~-vm@sIcVY~$Z{VE<1Yx@glZ@XwC)L;dnLE%<$V5@7s)5)4+6@X->36#xAA?*tC)
z|J@R!mY*>GYZ2?s5T)_20s6TdWcU~C|Ao`P0RKxEkr@BIQNh0WA04V|0tD5P&(hc+
zD7S=Whw<Q5GOXR0S~C1wF7N;Qcuz2E43nvKFXt>gnda2ybvtsC_)~9gP+z$2FJk<Y
z(gwcQeFxCHIT9%GuPoO~)E9z_zWpB@o6G-lEC=F$B3Y^~6XSpK_+Mp)m1d=W{;zo|
zfm5Nv7D`1a%k>iVNrr#H`0v_~|1A=w@-i|0Vf<Gv=qn|-IYi;9Jg{C@7YY7H=6@j<
zumAFNb%>_>%Kd*@|LS)9q6NIlUy1tsKNRr(AH;uH8S45~d3uZ;<nsVehxEz#YyDSw
zc1*>_zax3X)jk4M5UBFdm?jYX(WS~sbQ#@f$^w)6e~3H($wOqn$VqoJve7Gl{x<+0
zDL1Ho7ZPCbUqAQN-r5BGJ9q*Z@UK=4LTZ42p;%r`A%p+gol|@Ja{P1iKaPW7|6jFQ
z5L2W5c5w|Ty=Mt9_%Hl_<yl~I{PX@lITHf>fBlvR<G+3xUo!_D|LU{AWcWumhkw_C
z{eKrRo5Arv2<k1OkENL~_@`pCD*GYBKfc~x|Ka!ntp8N0gDzT({xxL*$@DLn|8WsM
z{|EfPF0H96AN>4>`5$#czamNu{xxL*$?z|D{(BBS69(|F-%twulUIIyyomnC=6@jH
zgZvMDDyb<q`q!2HF(Cf~pY_&S|LeF8;J;Rq#xLynPo4vh^(<h$n-66q^%Q|YM~<tP
z!*@pY$L4<Ibg8!s*fK<;N=K2VRI4fbLymuo&;M|J7ySQGikw)v!s|PL<U|SHb~6h9
z^>bhK9Zm569m|LP|5WQiM32&cJ+zDJ%_)=WpWpx2u@LP4JxAt4+tOa#&^A!-ek4HQ
zU(@XW0RNAV|GOUE6B6P-`t7W(fAzD!a{VL3?f>U^Hq8I%*ZykN`~DBF^-HMW|H^~H
zKNX)*+a<aG$K`)O{<m5cbW(%&|4z}oiVGC}HO>AH82<(Fe;?<Bf&Zu91bfGScFsQ!
z;@3d}i2v(X&H@}L{A-&1m*Za$|MwvOdjaKR7BK!VBj;=7^5Mfi&eBiuHu`Xkvhc$P
z4IL%^|H%9g+r#;Pu>Xe!Wwc0g`hUcxkYsW8xm9t-Px<>Fm;d29KIDJQ5c%My$i|sA
zf&C5oW_?NGPlMY*#ru!|kAFNKtbd74=QvFi-J%r#E_eQ$8{jh`;QddL0(!_`ZJVXG
pP5Z(5_Q5m1`u^qR5Sf3%c7QzhYe)>~5g%Lv34jDZ0yisx{{c4k(ux28

literal 0
HcmV?d00001

diff --git a/gix-merge/tests/fixtures/text-baseline.sh b/gix-merge/tests/fixtures/text-baseline.sh
new file mode 100644
index 0000000000..63f108f79d
--- /dev/null
+++ b/gix-merge/tests/fixtures/text-baseline.sh
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+set -eu -o pipefail
+
+git init
+rm -Rf .git/hooks
+
+function baseline() {
+  local ours=$DIR/${1:?1: our file}.blob;
+  local base=$DIR/${2:?2: base file}.blob;
+  local theirs=$DIR/${3:?3: their file}.blob;
+  local output=$DIR/${4:?4: the name of the output file}.merged;
+
+  shift 4
+  git merge-file --stdout "$@" "$ours" "$base" "$theirs" > "$output" || true
+
+  echo "$ours" "$base" "$theirs" "$output" "$@" >> baseline.cases
+}
+
+mkdir simple
+(cd simple
+  echo -e "line1-changed-by-both\nline2-to-be-changed-in-incoming" > ours.blob
+  echo -e "line1-to-be-changed-by-both\nline2-to-be-changed-in-incoming" > base.blob
+  echo -e "line1-changed-by-both\nline2-changed" > theirs.blob
+)
+
+# one big change includes multiple smaller ones
+mkdir multi-change
+(cd multi-change
+  cat <<EOF > base.blob
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+EOF
+
+  cat <<EOF > ours.blob
+0
+1
+X
+X
+4
+5
+Y
+Y
+8
+Z
+EOF
+
+  cat <<EOF > theirs.blob
+T
+T
+T
+T
+T
+T
+T
+T
+T
+T
+EOF
+)
+
+# a change with deletion/clearing our file
+mkdir clear-ours
+(cd clear-ours
+  cat <<EOF > base.blob
+0
+1
+2
+3
+4
+5
+EOF
+
+  touch ours.blob
+
+  cat <<EOF > theirs.blob
+T
+T
+T
+T
+T
+EOF
+)
+
+# a change with deletion/clearing their file
+mkdir clear-theirs
+(cd clear-theirs
+  cat <<EOF > base.blob
+0
+1
+2
+3
+4
+5
+EOF
+
+  cat <<EOF > ours.blob
+O
+O
+O
+O
+O
+EOF
+
+  touch theirs.blob
+)
+
+# differently sized changes
+mkdir ours-2-lines-theirs-1-line
+(cd ours-2-lines-theirs-1-line
+  cat <<EOF > base.blob
+0
+1
+2
+3
+4
+5
+EOF
+
+  cat <<EOF > ours.blob
+0
+1
+X
+X
+4
+5
+EOF
+
+  cat <<EOF > theirs.blob
+0
+1
+Y
+3
+4
+5
+EOF
+)
+
+# partial match
+mkdir partial-match
+(cd partial-match
+  cat <<EOF > base.blob
+0
+1
+2
+3
+4
+5
+EOF
+
+  cat <<EOF > ours.blob
+0
+X1
+X2
+X3
+X4
+5
+EOF
+
+  cat <<EOF > theirs.blob
+0
+X1
+2
+X3
+X4
+5
+EOF
+)
+
+# based on 'unique merge base' from 'diff3-conflict-markers'
+mkdir unique-merge-base-with-insertion
+(cd unique-merge-base-with-insertion
+  cat <<EOF > base.blob
+1
+2
+3
+4
+5
+EOF
+
+  # no trailing newline
+  echo -n $'1\n2\n3\n4\n5\n7' > ours.blob
+  echo -n $'1\n2\n3\n4\n5\nsix' > theirs.blob
+)
+
+for dir in  simple \
+            multi-change \
+            clear-ours \
+            clear-theirs \
+            ours-2-lines-theirs-1-line \
+            partial-match \
+            unique-merge-base-with-insertion; do
+  DIR=$dir
+  baseline ours base theirs merge
+  baseline ours base theirs diff3 --diff3
+  baseline ours base theirs zdiff3 --zdiff3
+  baseline ours base theirs merge-ours --ours
+  baseline ours base theirs merge-theirs --theirs
+  baseline ours base theirs merge-union --union
+done
\ No newline at end of file
diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs
new file mode 100644
index 0000000000..42d31832cb
--- /dev/null
+++ b/gix-merge/tests/merge/blob/builtin_driver.rs
@@ -0,0 +1,145 @@
+use gix_merge::blob::builtin_driver::binary::{Pick, ResolveWith};
+use gix_merge::blob::{builtin_driver, Resolution};
+
+#[test]
+fn binary() {
+    assert_eq!(
+        builtin_driver::binary(None),
+        (Pick::Ours, Resolution::Conflict),
+        "by default it picks ours and marks it as conflict"
+    );
+    assert_eq!(
+        builtin_driver::binary(Some(ResolveWith::Ancestor)),
+        (Pick::Ancestor, Resolution::Complete),
+        "Otherwise we can pick anything and it will mark it as complete"
+    );
+    assert_eq!(
+        builtin_driver::binary(Some(ResolveWith::Ours)),
+        (Pick::Ours, Resolution::Complete)
+    );
+    assert_eq!(
+        builtin_driver::binary(Some(ResolveWith::Theirs)),
+        (Pick::Theirs, Resolution::Complete)
+    );
+}
+
+mod text {
+    use bstr::ByteSlice;
+    use gix_merge::blob::Resolution;
+    use pretty_assertions::assert_str_eq;
+
+    #[test]
+    fn run_baseline() -> crate::Result {
+        let root = gix_testtools::scripted_fixture_read_only("text-baseline.sh")?;
+        let cases = std::fs::read_to_string(root.join("baseline.cases"))?;
+        let mut out = Vec::new();
+        for case in baseline::Expectations::new(&root, &cases) {
+            let mut input = imara_diff::intern::InternedInput::default();
+            dbg!(&case.name, case.options);
+            let actual = gix_merge::blob::builtin_driver::text(
+                &mut out,
+                &mut input,
+                &case.ours,
+                Some(case.ours_marker.as_str().as_ref()),
+                &case.base,
+                Some(case.base_marker.as_str().as_ref()),
+                &case.theirs,
+                Some(case.theirs_marker.as_str().as_ref()),
+                case.options,
+            );
+            let expected_resolution = if case.expected.contains_str("<<<<<<<") {
+                Resolution::Conflict
+            } else {
+                Resolution::Complete
+            };
+            assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,);
+            assert_str_eq!(
+                out.as_bstr().to_str_lossy(),
+                case.expected.to_str_lossy(),
+                "{}: output mismatch\n{}",
+                case.name,
+                out.as_bstr()
+            );
+        }
+        Ok(())
+    }
+
+    mod baseline {
+        use bstr::BString;
+        use gix_merge::blob::builtin_driver::text::{ConflictStyle, ResolveWith};
+        use std::path::Path;
+
+        #[derive(Debug)]
+        pub struct Expectation {
+            pub ours: BString,
+            pub ours_marker: String,
+            pub theirs: BString,
+            pub theirs_marker: String,
+            pub base: BString,
+            pub base_marker: String,
+            pub name: BString,
+            pub expected: BString,
+            pub options: gix_merge::blob::builtin_driver::text::Options,
+        }
+
+        pub struct Expectations<'a> {
+            root: &'a Path,
+            lines: std::str::Lines<'a>,
+        }
+
+        impl<'a> Expectations<'a> {
+            pub fn new(root: &'a Path, cases: &'a str) -> Self {
+                Expectations {
+                    root,
+                    lines: cases.lines(),
+                }
+            }
+        }
+
+        impl Iterator for Expectations<'_> {
+            type Item = Expectation;
+
+            fn next(&mut self) -> Option<Self::Item> {
+                let line = self.lines.next()?;
+                let mut words = line.split(' ');
+                let (Some(ours), Some(base), Some(theirs), Some(output)) =
+                    (words.next(), words.next(), words.next(), words.next())
+                else {
+                    panic!("need at least the input and output")
+                };
+
+                let read = |rela_path: &str| read_blob(self.root, rela_path);
+
+                let mut options = gix_merge::blob::builtin_driver::text::Options::default();
+                for arg in words {
+                    match arg {
+                        "--diff3" => options.conflict_style = ConflictStyle::Diff3,
+                        "--zdiff3" => options.conflict_style = ConflictStyle::ZealousDiff3,
+                        "--ours" => options.on_conflict = Some(ResolveWith::Ours),
+                        "--theirs" => options.on_conflict = Some(ResolveWith::Theirs),
+                        "--union" => options.on_conflict = Some(ResolveWith::Union),
+                        _ => panic!("Unknown argument to parse into options: '{arg}'"),
+                    }
+                }
+
+                Some(Expectation {
+                    ours: read(ours),
+                    ours_marker: ours.into(),
+                    theirs: read(theirs),
+                    theirs_marker: theirs.into(),
+                    base: read(base),
+                    base_marker: base.into(),
+                    expected: read(output),
+                    name: output.into(),
+                    options,
+                })
+            }
+        }
+
+        fn read_blob(root: &Path, rela_path: &str) -> BString {
+            std::fs::read(root.join(rela_path))
+                .unwrap_or_else(|_| panic!("Failed to read '{rela_path}' in '{}'", root.display()))
+                .into()
+        }
+    }
+}
diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs
new file mode 100644
index 0000000000..f781f63e48
--- /dev/null
+++ b/gix-merge/tests/merge/blob/mod.rs
@@ -0,0 +1 @@
+mod builtin_driver;
diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs
new file mode 100644
index 0000000000..05375cb227
--- /dev/null
+++ b/gix-merge/tests/merge/main.rs
@@ -0,0 +1,4 @@
+#[cfg(feature = "blob")]
+mod blob;
+
+pub use gix_testtools::Result;