From ca1a7bffee31dea20b96fbe7252653c4b22721d6 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Fri, 11 Nov 2022 15:48:05 +0100 Subject: [PATCH 1/4] lib: Initial path library design document --- lib/path-design.md | 333 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 lib/path-design.md diff --git a/lib/path-design.md b/lib/path-design.md new file mode 100644 index 0000000000000..0547476272643 --- /dev/null +++ b/lib/path-design.md @@ -0,0 +1,333 @@ +# Path library design + +This document documents why the `lib.path` library is designed the way it is. + +## Goals + +- Work without filesystem access + + We don't know where these paths will be used, eval-time, build-time or runtime. + +- Handle absolute and relative paths + +- Take path or string Nix data types as input + + Nix paths are convenient if you need to refer to project-local files, since they resolve relatively to the Nix file they are declared in. + + However, they always resolve to absolute paths. + We need strings to allow specifying relative paths. + +- Returns string data types + + Since Nix paths don't support relative paths and they mangle ".." + +- Don't allow ambiguous paths + + We don't know how these paths are used in the end. + When symlinks are involved, paths containting `..` may produce unexpected results. + + TODO: Alternatively, something like "Ignoring symlinks, every filesystem location under an anchor (either / or .) has exactly one normalised path pointing to it" + + TODO: Do we really want this though? See the `..` discussion below + +## Implementation notes + +In this library's main docs, discourage users from converting output strings into Nix paths, as this will invoke Nix's broken path handling. + +This library is only the first step towards a full filesystem handling library, consisting of three parts: +- `lib.path`: no filesystem access, works with eval-/build-/run-time paths +- `lib.filesystem`: filesystem access, but doesn't import into the store, only works with eval-time paths +- `lib.sources`: imports eval-time paths into the store + +TODO: Do `builtins` interacting with paths all work with strings? If they get strings, do they work correctly with `..` and symlinks? + +## Use cases +- Source filters and [Source combinators](https://github.com/NixOS/nixpkgs/pull/112083) +- Filesystem paths in NixOS + +## Other implementations and references + +- [Rust](https://doc.rust-lang.org/std/path/struct.Path.html) +- [Python](https://docs.python.org/3/library/pathlib.html) +- [Haskell](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html) +- [Nodejs](https://nodejs.org/api/path.html) +- [POSIX.1-2017](https://pubs.opengroup.org/onlinepubs/9699919799/nframe.html) + +## General design decisions + +Each subsection here contains a decision along with arguments and counter-arguments for (+) and against (-) that decision. + +### Leading dots for relative paths +[leading-dots]: #leading-dots-for-relative-paths + +Context: Relative paths can have a leading `./` to indicate it being a relative path, this is generally not necessary for tools though + +Decision: Returned relative paths should never have a leading `./` + +- :heavy_minus_sign: In shells, just running `foo` as a command wouldn't execute the file `foo`, whereas `./foo` would execute the file. In contrast, `foo/bar` does execute that file without the need for `./`. This can lead to confusion about when a `./` needs to be prefixed. If a `./` is always included, this becomes a non-issue. This effectively then means that paths don't overlap with command names. +- :heavy_minus_sign: Using paths in command line arguments could give problems if not escaped properly, e.g. if a path was `--version`. This is not a problem with `./--version`. This effectively then means that paths don't overlap with GNU-style command line options +- :heavy_plus_sign: The POSIX standard doesn't require `./` +- :heavy_plus_sign: It's more pretty without the `./`, good for error messages and co. + - :heavy_minus_sign: But similarly, it could be confusing whether something was even a path + e.g. `foo` could be anything, but `./foo` is more clearly a path +- :heavy_minus_sign: Makes it more uniform with absolute paths (those always start with `/`) + - :heavy_plus_sign: Not relevant though, this perhaps only simplifies the implementation a tiny bit +- :heavy_minus_sign: Makes even single-component relative paths (like `./foo`) valid as a path expression in Nix (`foo` wouldn't be) + - :heavy_plus_sign: Not relevant though, we won't use these paths in Nix expressions +- :heavy_minus_sign: `find` also outputs results with `./` + - :heavy_plus_sign: But only if you give it an argument of `.`. If you give it the argument `some-directory`, it won't prefix that +- :heavy_plus_sign: `realpath --relative-to` doesn't output `./`'s +- :heavy_plus_sign: Leads to `split "/foo/bar" == [ "/" "./foo" "./bar" ]`, which + - Is less performant + - Less pretty + - :heavy_minus_sign: This doesn't really matter though + - Makes each component have a `/` on its own, seeming like the components weren't fully split. Joining the string components together with `/` gives `/./foo/./bar` which is unnecessarily complex + - :heavy_minus_sign: This doesn't really matter though +- :heavy_plus_sign: Leads to wanting `split "foo" == [ "." "./foo" ]` + - Makes `./foo` splittable into `[ "." "./foo" ]` again + - :heavy_minus_sign: Why does that matter? + - :heavy_plus_sign: Makes it less performant + - :heavy_plus_sign: Makes it less performant + +### Two leading slashes +[two-slashes]: #two-leading-slashes + +Context: POSIX [specifies](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271) that exactly two leading slashes (e.g. `//foo/bar`) should be handled specially and that the first component can be resolved in an [implementation-defined way](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13). + +Decision: We don't handle this specially and treat `//foo/bar` the same as `/foo/bar` + +- :heavy_plus_sign: These paths generally [aren't used](https://unix.stackexchange.com/questions/256497/on-what-systems-is-foo-bar-different-from-foo-bar) +- :heavy_plus_sign: Handling these, even just erroring or ignoring them, would complicate the implementation and decrease performance + +### Representation of the current directory +[curdir]: #representation-of-the-current-directory + +Context: The current directory can be represented with `.` or `./` or `./.` + +Decision: It should be `./.` + +- :heavy_plus_sign: `.` would be the only path without a `/` and therefore not a valid Nix path in expressions + - :heavy_minus_sign: We don't require people to type this in expressions +- :heavy_plus_sign: `.` can be interpreted as a shell command (it's a builtin command for zsh) +- :heavy_plus_sign: `./` inconsistent with [the decision to not have trailing slashes](#trailing-slashes) +- :heavy_minus_sign: `./.` is rather long + - :heavy_plus_sign: We don't require users to type this though, it's only used as a library output. + As inputs all three variants are supported + +### `split` being part of the public API +[public-split]: #split-being-part-of-the-public-api + +Context: The main use case for `split` seems to be internal to the library and might not need to be exposed as a public API. +The inverse `join` does have lots of use cases though (it appends path components), so it should definitely be part of the public API + +Decision: `split` should be part of the public API + +- :heavy_minus_sign: We don't want to encourage custom path handling, which `split` enables + - :heavy_plus_sign: If there's a need for it, people will do custom handling either way. `split` is a primitive that can make this safer +- :heavy_plus_sign: We might not be able to cover all use cases with our path library + +### Representation +[representation]: #representation + +Context: Paths can be represented directly as a string, or as an attribute set like `{ components = [ "foo" "bar" ]; anchor = "/"; }` + +Decision: Paths are represented as strings + +- :heavy_plus_sign: It's simpler +- :heavy_plus_sign: It's faster + - :heavy_minus_sign: Unless you need to do certain path operations in sequence, e.g. `join [ (join [ "/foo" "bar" ]) "baz" ]` needs the inner `join` to return a string composed of its arguments, only for that string to be decomposed again in the outer `join` + - :heavy_plus_sign: We can mostly avoid such costs by exporting sufficiently powerful functions, so that users don't need to make multiple roundtrips to the library representation +- :heavy_plus_sign: `+` is convenient and doesn't work on attribute sets + - :heavy_minus_sign: It works if we add `__toString` attributes + - :heavy_plus_sign: But then all other attributes get wiped + - :heavy_plus_sign: And we'd then be able to `+` paths again + +### Parents +[parents]: #parents + +Context: Paths can have `..` components, which refer to the parent directory + +Decision: `..` path components are not supported, nor as inputs nor as outputs. + +- :heavy_plus_sign: It requires resolving symlinks to have proper behavior, since e.g. `foo/..` would not be the same as `.` if `foo` is a symlink. + - :heavy_plus_sign: We can't resolve symlinks without filesystem access + - :heavy_plus_sign: Nix also doesn't support reading symlinks at eval-time + - :heavy_minus_sign: What is "proper behavior"? Why can't we just not handle these cases? + - :heavy_plus_sign: E.g. `equals "/foo" "/foo/bar/.."` should those paths be equal? + - :heavy_minus_sign: That can just return `false`, the paths are different, we don't need to check whether the paths point to the same thing + - :heavy_plus_sign: E.g. `relativeTo "/foo" "/bar" == "../foo"`. If this is used like `/bar/../foo` in the end and `bar` is a symlink to somewhere else, this won't be accurate + - :heavy_minus_sign: We could not support such ambiguous operations, or mark them as such, e.g. the normal `relativeTo` will error on such a case, but there could be `extendedRelativeTo` supporting that +- :heavy_minus_sign: `..` are a part of paths, a path library should therefore support it + - :heavy_plus_sign: If we can prove that all such use cases are better done e.g. with runtime tools, the library not supporting it can nudge people towards that + - :heavy_minus_sign: Can we prove that though? +- :heavy_minus_sign: We could allow ".." just in the beginning + - :heavy_plus_sign: Then we'd have to throw an error for doing `join [ "/some/path" "../foo" ]`, making it non-composable + - :heavy_plus_sign: The same is for returning paths with `..`: `relativeTo "/foo" "/bar" => "../foo"` would produce a non-composable path +- :heavy_plus_sign: We argue that `..` is not needed at the Nix evaluation level, since we'd always start evaluation from the project root and don't go up from there + - :heavy_plus_sign: And `..` is supported in Nix paths, turning them into absolute paths + - :heavy_minus_sign: This is ambiguous with symlinks though +- :heavy_plus_sign: If you need `..` for building or runtime, you can use build/run-time tooling to create those (e.g. `realpath` with `--relative-to`), or use absolute paths instead. + This also gives you the ability to correctly handle symlinks + +### Trailing slashes +[trailing-slashes]: #trailing-slashes + +Context: Paths can contain trailing slashes, like `foo/`, indicating that the path points to a directory and not a file + +Decision: All functions remove trailing slashes in their results + +- Comparison to other frameworks to figure out the least surprising behavior: + - :heavy_plus_sign: Nix itself doesn't preserve trailing newlines when parsing and appending its paths + - :heavy_minus_sign: [Rust's std::path](https://doc.rust-lang.org/std/path/index.html) does preserve them during [construction](https://doc.rust-lang.org/std/path/struct.Path.html#method.new) + - :heavy_plus_sign: Doesn't preserve them when returning individual [components](https://doc.rust-lang.org/std/path/struct.Path.html#method.components) + - :heavy_plus_sign: Doesn't preserve them when [canonicalizing](https://doc.rust-lang.org/std/path/struct.Path.html#method.canonicalize) + - :heavy_plus_sign: [Python 3's pathlib](https://docs.python.org/3/library/pathlib.html#module-pathlib) doesn't preserve them during [construction](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath) + - Notably it represents the individual components as a list internally + - :heavy_minus_sign: [Haskell's filepath](https://hackage.haskell.org/package/filepath-1.4.100.0) has [explicit support](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#g:6) for handling trailing slashes + - :heavy_minus_sign: Does preserve them for [normalisation](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#v:normalise) + - :heavy_minus_sign: [NodeJS's Path library](https://nodejs.org/api/path.html) preserves trailing slashes for [normalisation](https://nodejs.org/api/path.html#pathnormalizepath) + - :heavy_plus_sign: For [parsing a path](https://nodejs.org/api/path.html#pathparsepath) into its significant elements, trailing slashes are not preserved +- :heavy_plus_sign: Nix's builtin function `dirOf` gives an unexpected result for paths with trailing slashes: `dirOf "/foo/bar/" == "/foo/bar"`. + Inconsistently, `baseNameOf` works correctly though: `baseNameOf "/foo/bar/" == "bar"`. + - :heavy_minus_sign: We are writing a path library to improve handling of paths though, so we shouldn't use these functions and discourage their use +- :heavy_minus_sign: Unexpected result when normalising intermediate paths, like `normalise ("/foo" + "/") + "bar" == "/foobar"` + - :heavy_plus_sign: Does this have a real use case? + - :heavy_plus_sign: Don't use `+` to append paths, this library has a `join` function for that + - :heavy_minus_sign: Users might use `+` instinctively though +- :heavy_plus_sign: The `realpath` command also removes trailing slashes +- :heavy_plus_sign: Even with a trailing slash, the path is the same, it's only an indication that it's a directory +- :heavy_plus_sign: Normalisation should return the same string when we know it's the same path, so removing the slash. + This way we can use the result as an attribute key. + +TODO: +- Add more language comparisons + +## API + +TODO: +- baseNameOf +- dirOf +- isRelativeTo +- commonAncestor +- equals +- extension getter/setter +- List of all ancestors (including self), like + +### `isAbsolute` + +Whether a path is absolute, meaning it starts with a slash. Does not check whether the path is valid. + +Examples: +- `isAbsolute "" == ` +- `isAbsolute "/" == true` +- `isAbsolute "/foo" == true` +- `isAbsolute "." == false` +- `isAbsolute "bar" == false` + +Decisions: +- Also counts exactly two leading `/` as an absolute path, even though it's not [according to POSIX](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_02). + Justification [here](#two-slashes) +- Does not check whether the path is valid because the function doesn't return the path and it would not be efficient. + Only functions operating on the full path should make sure it's valid. + +### `isRelative` + +Whether a path is relative, meaning it doesn't start with a slash. +This is the boolean inverse of `isAbsolute`. + +### `relativeTo` + +Turns an absolute path into a relative path. + +Examples: +- `relativeTo "/foo" "/foo/bar" == "bar"` +- `relativeTo "/baz" "/foo/bar" == ` +- `relativeTo "foo" "foo/bar" == "bar"` +- `relativeTo "foo" "/foo/bar" == ` +- `relativeTo "/" "/foo/bar" == "foo/bar"` + +Use cases: +- For source combinators, a way to set a source to point to a subpath using `setSubpath ./foo/bar ./.`. This needs to calculate the relative path `foo/bar` from the absolute path `./foo/bar` resolved by Nix + +Laws: + +### `split` + +Splits a path into its components. +If the path is absolute, the first resulting component is `/`. +If the path is relative, the first resulting component is `.`. + +Examples: +- `split "/" == ["/"]` +- `split "/foo" == ["/" "foo"]` +- `split "." == [ ]` +- `split "bar" == [ "bar" ]` + +Invariants: +- Inverse of `join`: + `join (split p) == normalise p` +- Components can't be split any further: + `! exists cs . join cs == p && length cs > length (split p)` +- TODO: Law that ensures components are normalized? + +Use cases: +- TODO + +### `join` + +Joins path components together. All but the first component must be relative, though they can contain non-leading slashes. + +Examples: +- `join ["/foo" "bar"] == "/foo/bar"` +- `join ["foo" "bar/baz"] == "foo/bar/baz"` +- `join ["/foo" "/bar"] == ` +- `join ["/foo" (relativeTo "/" "/bar") ] == "/foo/bar"` + +Laws: +- Inverse of `split`: + `join (split p) == normalise p` +- Associativity (TODO: Why do we need this?): + `join [ (join [a b]) c ] == join [ a (join [b c]) ]` +- The result is normalised: + join as == normalise (join as) +- Joining a single path is that path itself, but normalised: + join [ p ] == normalise p + +Use cases: +- TODO + +### `normalise` + +Normalizes the path by: +- Limiting repeating `/` to a single one (does not change a [POSIX Pathname](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271)) +- Removing extraneous `.` components (does not change the result of [POSIX Pathname Resolution](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)) +- Erroring for empty strings (not allowed as a [POSIX Filename](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170)) +- Removing trailing `/` and `/.` (See [justification](#trailing-slashes)) +- Erroring for ".." components (See [justification](#parents)) +- Removing `./` from relative paths (See [justification](#leading-dots)) + +Examples: +- `normalise "foo" == "foo"` +- `normalise "/foo//bar" == "/foo/bar"` +- `normalise "/foo/./bar" == "/foo/bar"` +- `normalise "" == "` +- `normalise "/foo/" == "/foo"` +- `normalise "/foo/." == "/foo"` +- `normalise "/foo/../bar" == ` +- `normalise "//foo" == "/foo"` +- `normalise "///foo" == "/foo"` +- `normalise "//././//foo/.//.///bar/." == "/foo/bar"` + +Laws: +- Same as splitting and joining: + join (split p) == normalise p +- Idempotency: + `normalise (normalise p) == normalise p` +- Behaves like `realpath`: + `isAbsolute p => normalise p == realpath --no-symlinks --canonicalize-missing p` + `isRelative p => normalise p == realpath --no-symlinks --canonicalize-missing --relative-to=. p` + +Use cases: +- As an attribute name for a path -> value lookup attribute set + - E.g. `environment.etc.` +- Path equality comparison From 1efb995d39d0bd7c28fb763c735578220fa967fa Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Wed, 30 Nov 2022 19:57:29 +0100 Subject: [PATCH 2/4] Update design document and implement join and normalise --- doc/doc-support/lib-function-docs.nix | 1 + doc/functions/library.xml | 2 + lib/default.nix | 3 +- lib/path-design.md | 261 ++++++++++---------------- lib/path.nix | 138 ++++++++++++++ 5 files changed, 245 insertions(+), 160 deletions(-) create mode 100644 lib/path.nix diff --git a/doc/doc-support/lib-function-docs.nix b/doc/doc-support/lib-function-docs.nix index cbcbed4310af1..de0eaa2a5a0f1 100644 --- a/doc/doc-support/lib-function-docs.nix +++ b/doc/doc-support/lib-function-docs.nix @@ -22,6 +22,7 @@ with pkgs; stdenv.mkDerivation { docgen lists 'List manipulation functions' docgen debug 'Debugging functions' docgen options 'NixOS / nixpkgs option handling' + docgen path 'Path functions' docgen filesystem 'Filesystem functions' docgen sources 'Source filtering functions' ''; diff --git a/doc/functions/library.xml b/doc/functions/library.xml index b291356c14b85..790d3a4aea400 100644 --- a/doc/functions/library.xml +++ b/doc/functions/library.xml @@ -26,6 +26,8 @@ + + diff --git a/lib/default.nix b/lib/default.nix index 8bb06954518b9..c8fe6fd9876ab 100644 --- a/lib/default.nix +++ b/lib/default.nix @@ -27,7 +27,6 @@ let maintainers = import ../maintainers/maintainer-list.nix; teams = callLibs ../maintainers/team-list.nix; meta = callLibs ./meta.nix; - sources = callLibs ./sources.nix; versions = callLibs ./versions.nix; # module system @@ -53,7 +52,9 @@ let fetchers = callLibs ./fetchers.nix; # Eval-time filesystem handling + path = callLibs ./path.nix; filesystem = callLibs ./filesystem.nix; + sources = callLibs ./sources.nix; # back-compat aliases platforms = self.systems.doubles; diff --git a/lib/path-design.md b/lib/path-design.md index 0547476272643..9a46a78f72dfc 100644 --- a/lib/path-design.md +++ b/lib/path-design.md @@ -2,56 +2,121 @@ This document documents why the `lib.path` library is designed the way it is. -## Goals +The goal of this library is to support the built-in path value type with extra functionality. +Since the path value type implicitly imports paths from the eval-time system into the store, +this library explicitly doesn't support build-time or runtime paths, including paths of derivations. -- Work without filesystem access +Overall, this library works with two basic forms of paths: +- Absolute paths are represented with the path value type. Nix automatically normalises these paths. +- Relative paths are represented with the string value type. This library normalises these paths as safely as possible. - We don't know where these paths will be used, eval-time, build-time or runtime. +Notably absolute paths in a string value type are not supported, the use of the string value type for relative paths is only because the path value type doesn't support relative paths. -- Handle absolute and relative paths +This library is designed to be as safe and intuitive as possible, throwing errors when potentially unsafe operations are tried, and giving an expected result otherwise. -- Take path or string Nix data types as input +This library is designed to work well as a dependency for the `lib.filesystem` and `lib.sources` library components. Contrary to these library components, `lib.path` is designed to not read any paths from the filesystem. - Nix paths are convenient if you need to refer to project-local files, since they resolve relatively to the Nix file they are declared in. +## Use cases +- Source filters and [Source combinators](https://github.com/NixOS/nixpkgs/pull/112083) +- Filesystem paths in NixOS - However, they always resolve to absolute paths. - We need strings to allow specifying relative paths. -- Returns string data types +## API - Since Nix paths don't support relative paths and they mangle ".." +### `join` -- Don't allow ambiguous paths +Joins paths together with `/`. All but the first component must be relative. Returns the same data type as the first element. - We don't know how these paths are used in the end. - When symlinks are involved, paths containting `..` may produce unexpected results. +Examples: +- `join [/foo "bar"] == /foo/bar` +- `join ["foo" "bar/baz"] == "foo/bar/baz"` +- `join [/foo ".." "bar"] == ` +- `join [/foo "/bar"] == ` +- `join [ "foo" (removePrefix /. /bar) ] == "foo/bar"` - TODO: Alternatively, something like "Ignoring symlinks, every filesystem location under an anchor (either / or .) has exactly one normalised path pointing to it" +Laws: +- The result is normalised: + `join ps == normalise (join ps)` +- Joining a single path is that path itself, but normalised: + join [ p ] == normalise p - TODO: Do we really want this though? See the `..` discussion below +Use cases: +- TODO -## Implementation notes +### `normalise` -In this library's main docs, discourage users from converting output strings into Nix paths, as this will invoke Nix's broken path handling. +Normalizes paths. For absolute path values, nothing is done as Nix already normalises those. For relative path the following is done: +- Limiting repeating `/` to a single one (does not change a [POSIX Pathname](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271)) +- Removing extraneous `.` components (does not change the result of [POSIX Pathname Resolution](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)) +- Erroring for empty strings (not allowed as a [POSIX Filename](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170)) +- Removing trailing `/` and `/.` (See [justification](#trailing-slashes)) +- Erroring for ".." components (See [justification](#parents)) +- Removing leading `./` (See [justification](#leading-dots)) -This library is only the first step towards a full filesystem handling library, consisting of three parts: -- `lib.path`: no filesystem access, works with eval-/build-/run-time paths -- `lib.filesystem`: filesystem access, but doesn't import into the store, only works with eval-time paths -- `lib.sources`: imports eval-time paths into the store +Examples: +- `normalise "foo//bar" == "foo/bar"` +- `normalise "foo/./bar" == "foo/bar"` +- `normalise "" == ` +- `normalise "foo/bar/" == "foo/bar"` +- `normalise "foo/bar/." == "foo/bar"` +- `normalise "foo/../bar" == ` +- `normalise "./foo/bar" == "foo/bar"` -TODO: Do `builtins` interacting with paths all work with strings? If they get strings, do they work correctly with `..` and symlinks? +Laws: +- Idempotency: + `normalise (normalise p) == normalise p` +- Behaves like `realpath`: + `isAbsolute p => normalise p == realpath --no-symlinks --canonicalize-missing p` + `isRelative p => normalise p == realpath --no-symlinks --canonicalize-missing --relative-to=. p` -## Use cases -- Source filters and [Source combinators](https://github.com/NixOS/nixpkgs/pull/112083) -- Filesystem paths in NixOS +Use cases: +- As an attribute name for a path -> value lookup attribute set + - E.g. `environment.etc.` +- Path equality comparison -## Other implementations and references +### `removePrefix` + +Removes a base directory prefix from a path, returning a relative path. + +Examples: +- `removePrefix /foo /foo/bar == "bar"` +- `removePrefix /baz /foo/bar == ` +- `removePrefix "foo" "foo/bar" == "bar"` +- `removePrefix "foo" /foo/bar == ` +- `removePrefix /. /foo/bar == "foo/bar"` + +Use cases: +- For source combinators, a way to set a source to point to a subpath using `setSubpath ./foo/bar ./.`. This needs to calculate the relative path `foo/bar` from the absolute path `./foo/bar` resolved by Nix + +Laws: + +### `hasPrefix` + +Returns whether a path has a specific base directory prefix. Returns true iff `removePrefix` doesn't error for the same arguments. + +Examples: +- `hasPrefix /foo /foo/bar == true` +- `hasPrefix /baz /foo/bar == false` +- `hasPrefix "foo" "foo/bar" == true` +- `hasPrefix "foo" /foo/bar == false` +- `hasPrefix /. /foo/bar == true` + +Use cases: +- Checking whether `removePrefix` would error before calling it + +Laws: + +### Out of scope (for now at least) + +- isAbsolute and related functions +- baseNameOf +- dirOf +- isRelativeTo +- commonAncestor +- equals +- extension getter/setter +- List of all ancestors (including self), like -- [Rust](https://doc.rust-lang.org/std/path/struct.Path.html) -- [Python](https://docs.python.org/3/library/pathlib.html) -- [Haskell](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html) -- [Nodejs](https://nodejs.org/api/path.html) -- [POSIX.1-2017](https://pubs.opengroup.org/onlinepubs/9699919799/nframe.html) ## General design decisions @@ -202,132 +267,10 @@ Decision: All functions remove trailing slashes in their results TODO: - Add more language comparisons -## API - -TODO: -- baseNameOf -- dirOf -- isRelativeTo -- commonAncestor -- equals -- extension getter/setter -- List of all ancestors (including self), like - -### `isAbsolute` - -Whether a path is absolute, meaning it starts with a slash. Does not check whether the path is valid. - -Examples: -- `isAbsolute "" == ` -- `isAbsolute "/" == true` -- `isAbsolute "/foo" == true` -- `isAbsolute "." == false` -- `isAbsolute "bar" == false` - -Decisions: -- Also counts exactly two leading `/` as an absolute path, even though it's not [according to POSIX](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_02). - Justification [here](#two-slashes) -- Does not check whether the path is valid because the function doesn't return the path and it would not be efficient. - Only functions operating on the full path should make sure it's valid. - -### `isRelative` - -Whether a path is relative, meaning it doesn't start with a slash. -This is the boolean inverse of `isAbsolute`. - -### `relativeTo` - -Turns an absolute path into a relative path. - -Examples: -- `relativeTo "/foo" "/foo/bar" == "bar"` -- `relativeTo "/baz" "/foo/bar" == ` -- `relativeTo "foo" "foo/bar" == "bar"` -- `relativeTo "foo" "/foo/bar" == ` -- `relativeTo "/" "/foo/bar" == "foo/bar"` - -Use cases: -- For source combinators, a way to set a source to point to a subpath using `setSubpath ./foo/bar ./.`. This needs to calculate the relative path `foo/bar` from the absolute path `./foo/bar` resolved by Nix - -Laws: - -### `split` - -Splits a path into its components. -If the path is absolute, the first resulting component is `/`. -If the path is relative, the first resulting component is `.`. - -Examples: -- `split "/" == ["/"]` -- `split "/foo" == ["/" "foo"]` -- `split "." == [ ]` -- `split "bar" == [ "bar" ]` - -Invariants: -- Inverse of `join`: - `join (split p) == normalise p` -- Components can't be split any further: - `! exists cs . join cs == p && length cs > length (split p)` -- TODO: Law that ensures components are normalized? - -Use cases: -- TODO - -### `join` - -Joins path components together. All but the first component must be relative, though they can contain non-leading slashes. - -Examples: -- `join ["/foo" "bar"] == "/foo/bar"` -- `join ["foo" "bar/baz"] == "foo/bar/baz"` -- `join ["/foo" "/bar"] == ` -- `join ["/foo" (relativeTo "/" "/bar") ] == "/foo/bar"` - -Laws: -- Inverse of `split`: - `join (split p) == normalise p` -- Associativity (TODO: Why do we need this?): - `join [ (join [a b]) c ] == join [ a (join [b c]) ]` -- The result is normalised: - join as == normalise (join as) -- Joining a single path is that path itself, but normalised: - join [ p ] == normalise p - -Use cases: -- TODO - -### `normalise` - -Normalizes the path by: -- Limiting repeating `/` to a single one (does not change a [POSIX Pathname](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271)) -- Removing extraneous `.` components (does not change the result of [POSIX Pathname Resolution](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)) -- Erroring for empty strings (not allowed as a [POSIX Filename](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170)) -- Removing trailing `/` and `/.` (See [justification](#trailing-slashes)) -- Erroring for ".." components (See [justification](#parents)) -- Removing `./` from relative paths (See [justification](#leading-dots)) - -Examples: -- `normalise "foo" == "foo"` -- `normalise "/foo//bar" == "/foo/bar"` -- `normalise "/foo/./bar" == "/foo/bar"` -- `normalise "" == "` -- `normalise "/foo/" == "/foo"` -- `normalise "/foo/." == "/foo"` -- `normalise "/foo/../bar" == ` -- `normalise "//foo" == "/foo"` -- `normalise "///foo" == "/foo"` -- `normalise "//././//foo/.//.///bar/." == "/foo/bar"` - -Laws: -- Same as splitting and joining: - join (split p) == normalise p -- Idempotency: - `normalise (normalise p) == normalise p` -- Behaves like `realpath`: - `isAbsolute p => normalise p == realpath --no-symlinks --canonicalize-missing p` - `isRelative p => normalise p == realpath --no-symlinks --canonicalize-missing --relative-to=. p` +## Other implementations and references -Use cases: -- As an attribute name for a path -> value lookup attribute set - - E.g. `environment.etc.` -- Path equality comparison +- [Rust](https://doc.rust-lang.org/std/path/struct.Path.html) +- [Python](https://docs.python.org/3/library/pathlib.html) +- [Haskell](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html) +- [Nodejs](https://nodejs.org/api/path.html) +- [POSIX.1-2017](https://pubs.opengroup.org/onlinepubs/9699919799/nframe.html) diff --git a/lib/path.nix b/lib/path.nix new file mode 100644 index 0000000000000..3bc1132ef8cef --- /dev/null +++ b/lib/path.nix @@ -0,0 +1,138 @@ +# Functions for working with file paths +{ lib }: +let + + inherit (builtins) + isPath + isString + split + substring + ; + + inherit (lib.asserts) + assertMsg + ; + + inherit (lib.lists) + length + head + last + genList + elemAt + concatLists + imap1 + imap0 + tail + ; + + inherit (lib.generators) + toPretty + ; + + inherit (lib.strings) + concatStringsSep + ; + + validRelativeString = value: errorPrefix: + if value == "" then + throw "${errorPrefix}: The string is empty, which is not a valid path" + else if substring 0 1 value == "/" then + throw "${errorPrefix}: The string starts with a `/`, representing an absolute path. Use a path value for absolute paths instead" + else true; + + # Splits a relative path string into its components + # Errors for ".." components, doesn't include "." components + normaliseComponents = path: errorPrefix: + assert assertMsg (isString path) "${errorPrefix}: Not a relative path string"; + assert validRelativeString path "${errorPrefix}: Not a valid relative path string"; + let + # Split the string into its parts using regex for efficiency. This regex + # matches patterns like "/", "/./", "/././", with arbitrarily many "/"s + # together. These are the main special cases: + # - Leading "./" gets split into a leading "." part + # - Trailing "/." or "/" get split into a trailing "." or "" + # part respectively + # + # These are the only cases where "." and "" parts can occur + parts = split "/+(\\./+)*" path; + + # `split` creates a list of 2 * k + 1 elements, containing the k + + # 1 parts, interleaved with k matches where k is the number of + # (non-overlapping) matches. This calculation here gets the number of parts + # back from the list length + # floor( (2 * k + 1) / 2 ) + 1 == floor( k + 1/2 ) + 1 == k + 1 + partCount = length parts / 2 + 1; + + # To assemble the final list of components we want to: + # - Skip a potential leading ".", normalising "./foo" to "foo" + # - Skip a potential trailing "." or "", normalising "foo/" and "foo/." to + # "foo" + skipStart = if head parts == "." then 1 else 0; + skipEnd = if last parts == "." || last parts == "" then 1 else 0; + + # We can now know the length of the result by removing the number of + # skipped parts from the total number + componentCount = partCount - skipEnd - skipStart; + + in + # Special case of a single "." path component. Such a case leaves a + # componentCount of -1 due to the skipStart/skipEnd not verifying that + # they don't refer to the same character + if path == "." then [] + + # And we can use this to generate the result list directly. Doing it this + # way over a combination of `filter`, `init` and `tail` makes it more + # efficient, because we don't allocate any intermediate lists + else genList (index: + let + # To get to the element we need to add the number of parts we skip and + # multiply by two due to the interleaved layout of `parts` + value = elemAt parts ((skipStart + index) * 2); + in + + # We don't support ".." components, see ./path-design.md + if value == ".." then + throw "${errorPrefix}: Path string contains contains a `..` component, which is not supported" + # Otherwise just return the part unchanged + else + value + ) componentCount; + + joinAbsolute = firstPath: relativePaths: + let + allComponents = concatLists (imap1 (i: el: + normaliseComponents el "lib.path.join: Cannot normalise element ${toPretty { multiline = false; } el} at index ${toString i}" + ) relativePaths); + in + if allComponents == [] then firstPath + else firstPath + ("/" + concatStringsSep "/" allComponents); + + joinRelative = relativePaths: + let + allComponents = concatLists (imap0 (i: el: + normaliseComponents el "lib.path.join: Cannot normalise element ${toPretty { multiline = false; } el} at index ${toString i}" + ) relativePaths); + in + # An empty string is not a valid relative path, so we need to return a `.` when we have no components + if allComponents == [] then "." + else concatStringsSep "/" allComponents; + + +in /* No rec! Add dependencies on this file just above */ { + + join = paths: + assert assertMsg (paths != []) "lib.path.join: No paths provided"; + let firstPath = head paths; in + if isPath firstPath then joinAbsolute firstPath (tail paths) + else if isString firstPath then joinRelative paths + else throw "lib.path.join: First passed element ${toPretty { multiline = false; } firstPath} is neither an absolute path value nor a relative path string"; + + normalise = path: + if isPath path then path + else if isString path then + let components = normaliseComponents path "lib.path.normalise: Cannot normalise value ${toPretty { multiline = false; } path}"; + in if components == [] then "." + else concatStringsSep "/" components + else throw "lib.path.normalise: Passed value ${toPretty { multiline = false; } path} is neither an absolute path value nor a relative path string"; + +} From 49cb1ec3a1641164d37b36d5cd0635ba51c0672b Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Fri, 2 Dec 2022 20:21:00 +0100 Subject: [PATCH 3/4] Clearer types - Changes the agreed-upon design slightly to make types of functions clearer: - Previously `path.join` worked on a list of paths, but required all but the first component to relative. This is now split into two functions: - `path.append ` takes care of appending a relative path to an absolute path. - `path.relative.join [ ]` takes care of joining relative paths together - `path.normalise` -> `path.relative.normalise`, because we don't need normalisation on absolute paths, Nix already takes care of that, and we use the `path.relative` namespace for anything only relating to relative paths - Some more bikeshedding for the `relativeTo` name. I think `relativeTo` is pretty good, but @fricklerhandwerk likes other suggestions more - Adds some suggestions for partial ordering checks on paths - Adds a `difference` function, which can take care of common prefix and subpath calculations between any number of paths. --- lib/path-design.md | 351 +++++++++++++++++++++++++++++---------------- 1 file changed, 224 insertions(+), 127 deletions(-) diff --git a/lib/path-design.md b/lib/path-design.md index 9a46a78f72dfc..25e9ba71285fc 100644 --- a/lib/path-design.md +++ b/lib/path-design.md @@ -2,109 +2,239 @@ This document documents why the `lib.path` library is designed the way it is. -The goal of this library is to support the built-in path value type with extra functionality. -Since the path value type implicitly imports paths from the eval-time system into the store, -this library explicitly doesn't support build-time or runtime paths, including paths of derivations. +The purpose of this library is to process paths. It does not read files from the filesystem. +It exists to support the native Nix path value type with extra functionality. + +Since the path value type implicitly imports paths from the "eval-time system" into the store, +this library explicitly doesn't support build-time or run-time paths, including paths to derivations. Overall, this library works with two basic forms of paths: -- Absolute paths are represented with the path value type. Nix automatically normalises these paths. +- Absolute paths are represented with the Nix path value type. Nix automatically normalises these paths. - Relative paths are represented with the string value type. This library normalises these paths as safely as possible. Notably absolute paths in a string value type are not supported, the use of the string value type for relative paths is only because the path value type doesn't support relative paths. -This library is designed to be as safe and intuitive as possible, throwing errors when potentially unsafe operations are tried, and giving an expected result otherwise. +This library is designed to be as safe and intuitive as possible, throwing errors when operations are attempted that would produce surprising results, and giving the expected result otherwise. This library is designed to work well as a dependency for the `lib.filesystem` and `lib.sources` library components. Contrary to these library components, `lib.path` is designed to not read any paths from the filesystem. -## Use cases -- Source filters and [Source combinators](https://github.com/NixOS/nixpkgs/pull/112083) -- Filesystem paths in NixOS +## API + +### `append` +```haskell +append :: Path -> String -> Path +``` -## API +Append a relative path to an absolute path. + +Like ` + ("/" + )` but safer. + +Examples: +```nix +append /foo "bar" == /foo/bar + +# can append to root directory +append /. "foo" == /foo + +# normalise the path +append /foo "bar//./baz" == /foo/bar/baz + +# remove trailing slashes +append /foo "bar/" == /foo/bar + +# do not handle parent directory, as it may break underlying symlinks +append /foo "foo/../bar" == + +# prevent appending empty strings by accident +append /foo "" == + +# prevent appending absolute paths by accident +append /foo "/bar" == +``` + +### `relative.join` + +```haskell +relative.join :: [ String ] -> String +``` -### `join` +Join relative paths using `/`. -Joins paths together with `/`. All but the first component must be relative. Returns the same data type as the first element. +Like `concatStringsSep "/"` but safer. Examples: -- `join [/foo "bar"] == /foo/bar` -- `join ["foo" "bar/baz"] == "foo/bar/baz"` -- `join [/foo ".." "bar"] == ` -- `join [/foo "/bar"] == ` -- `join [ "foo" (removePrefix /. /bar) ] == "foo/bar"` +```nix +relative.join ["foo" "bar"] == "foo/bar" +relative.join ["foo" "bar/baz" ] == "foo/bar/baz" +relative.join [ "." ] == "." +relative.join [ "." "foo" ] == "foo" + +# normalise the path +relative.join ["./foo" "bar//./baz/" "./qux" ] == "foo/bar/baz/qux" + +# empty list is the current directory +relative.join [] == "." + +# do not handle parent directory, as it may break underlying symlinks +relative.join ["foo" ".."] == + +# do not handle absolute paths elements +relative.join ["/foo" "bar"] == +relative.join ["foo" "/bar"] == +relative.join ["foo" "/" ] == +``` Laws: +- Associativity: + `relative.join [ x (join [ y z ]) ] == relative.join [ (join [ x y ]) z ]` +- Identity: + `relative.join [] == "."` + `relative.join [p "."] == normalise p` + `relative.join ["." p] == normalise p` - The result is normalised: - `join ps == normalise (join ps)` + `relative.join ps == normalise (relative.join ps)` - Joining a single path is that path itself, but normalised: - join [ p ] == normalise p + `relative.join [ p ] == normalise p` -Use cases: -- TODO +### `relative.normalise` -### `normalise` +```haskell +relative.normalise :: String -> String +``` -Normalizes paths. For absolute path values, nothing is done as Nix already normalises those. For relative path the following is done: -- Limiting repeating `/` to a single one (does not change a [POSIX Pathname](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271)) -- Removing extraneous `.` components (does not change the result of [POSIX Pathname Resolution](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)) -- Erroring for empty strings (not allowed as a [POSIX Filename](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170)) -- Removing trailing `/` and `/.` (See [justification](#trailing-slashes)) -- Erroring for ".." components (See [justification](#parents)) -- Removing leading `./` (See [justification](#leading-dots)) +Normalise relative paths. + +- Limit repeating `/` to a single one (does not change a [POSIX Pathname](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271)) +- Remove redundant `.` components (does not change the result of [POSIX Pathname Resolution](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13)) +- Error on empty strings (not allowed as a [POSIX Filename](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_170)) +- Remove trailing `/` and `/.` (See [justification](#trailing-slashes)) +- Error on `..` path components (See [justification](#parents)) +- Remove leading `./` (See [justification](#leading-dots)) Examples: -- `normalise "foo//bar" == "foo/bar"` -- `normalise "foo/./bar" == "foo/bar"` -- `normalise "" == ` -- `normalise "foo/bar/" == "foo/bar"` -- `normalise "foo/bar/." == "foo/bar"` -- `normalise "foo/../bar" == ` -- `normalise "./foo/bar" == "foo/bar"` +``` +# limit repeating `/` to a single one +relative.normalise "foo//bar" == "foo/bar" + +# remove redundant `.` components +relative.normalise "foo/./bar" == "foo/bar" + +# remove leading `./` +# TODO: bikeshedding. there is a use to this. +relative.normalise "./foo/bar" == "foo/bar" + +# remove trailing `/` +relative.normalise "foo/bar/" == "foo/bar" + +# remove trailing `/.` +relative.normalise "foo/bar/." == "foo/bar" + +# error on `..` path components +relative.normalise "foo/../bar" == + +# error on empty string +relative.normalise "" == + +# error on absolute path +relative.normalise "/foo" == +``` Laws: - Idempotency: - `normalise (normalise p) == normalise p` -- Behaves like `realpath`: - `isAbsolute p => normalise p == realpath --no-symlinks --canonicalize-missing p` - `isRelative p => normalise p == realpath --no-symlinks --canonicalize-missing --relative-to=. p` -Use cases: -- As an attribute name for a path -> value lookup attribute set - - E.g. `environment.etc.` -- Path equality comparison + `relative.normalise (relative.normalise p) == relative.normalise p` -### `removePrefix` +- Doesn't change the file system object pointed to: -Removes a base directory prefix from a path, returning a relative path. + `$(stat ${p}) == $(stat ${relative.normalise p})` -Examples: -- `removePrefix /foo /foo/bar == "bar"` -- `removePrefix /baz /foo/bar == ` -- `removePrefix "foo" "foo/bar" == "bar"` -- `removePrefix "foo" /foo/bar == ` -- `removePrefix /. /foo/bar == "foo/bar"` + TODO: Is this the same as the law below? -Use cases: -- For source combinators, a way to set a source to point to a subpath using `setSubpath ./foo/bar ./.`. This needs to calculate the relative path `foo/bar` from the absolute path `./foo/bar` resolved by Nix +- Uniqueness: If the normalisation of two paths is different then they point to different paths: -Laws: + `normalise p != normalise q => $(stat ${p}) != $(stat ${q})` -### `hasPrefix` + Note: This law only holds if trailing slashes are not persisted, see the [trailing slashes decision](#trailing-slashes) -Returns whether a path has a specific base directory prefix. Returns true iff `removePrefix` doesn't error for the same arguments. +### `difference` + +```haskell +difference :: AttrsOf Path -> { commonPrefix :: Path; suffix :: AttrsOf String; } +``` + +Take the difference between multiple paths, returning the common prefix between them and the respective suffices. Examples: -- `hasPrefix /foo /foo/bar == true` -- `hasPrefix /baz /foo/bar == false` -- `hasPrefix "foo" "foo/bar" == true` -- `hasPrefix "foo" /foo/bar == false` -- `hasPrefix /. /foo/bar == true` +```nix +difference { path = /foo/bar } = { commonPrefix = /foo/bar; suffix = { path = "."; }; } +difference { left = /foo/bar; right = /foo/baz; } = { commonPrefix = /foo; suffix = { left = "bar"; right = "baz"; }; } +difference { left = /foo; right = /foo/bar; } = { commonPrefix = /foo; suffix = { left = "."; right = "bar"; }; } +difference { left = /.; right = /foo; } = { commonPrefix = /.; suffix = { left = "."; right = "bar"; }; } +difference { left = /foo; right = /foo; } = { commonPrefix = /foo; suffix = { left = "."; right = "."; }; } + +# Requires at least one path +difference {} = +``` + +### `relativeTo` + +```haskell +relativeTo :: Path -> Path -> String +``` + +Returns the relative path to go from a base absolute path to a specific descendant. + +TODO Not sure about the name, ideas are: +- `relativeTo`: We might want to use `relativeTo` for a function that can return `..`, but that's not a problem because this function can just throw an error until (if ever) that's supported +- `removePrefix`, `stripPrefix`: Might accidentally use the string variants which have non-desired behavior on paths, we could fix those though, and we are qualified under `lib.path.*` anyways +- `subpathBetween`, `subpathFrom`, `subpathFromTo`: Introduces the "subpath" concept when it's not mentioned anywhere else +- `descendantSubpath`: Would be nice to align this somehow with comparison functions +- Don't have this function, can use `difference` instead +- Prefix this function name with `_` to signify it's not final and might change in the future? -Use cases: -- Checking whether `removePrefix` would error before calling it +Examples: +``` +relativeTo /foo /foo/bar == "bar" +relativeTo /. /foo/bar == "foo/bar" +relativeTo /baz /foo/bar == +``` Laws: +- `relativeTo p p == "."` +- `relativeTo p (append p q) = relative.normalise q` + +### Partial ordering query functions + +Paths with their ancestor-descendant relationship are a [partial ordered set](https://en.wikipedia.org/wiki/Partially_ordered_set) (proof left as an exercise to the reader). We should have some basic functions for querying that relationship. This is at least necessary to check whether `relativeTo` errors or not before calling it. + +``` +isDescendantOf /foo /foo/bar == true +isDescendantOf /foo /foo == ?? +isAncestorOf /foo/bar /foo == true +isAncestorOf /foo /foo == ?? + +isProperDescendantOf /foo /foo/bar == true +isProperDescendantOf /foo /foo == ?? +isProperAncestorOf /foo/bar /foo == true +isProperAncestorOf /foo /foo == ?? + +isDescendantOrEqual /foo /foo/bar == true +isDescendantOrEqual /foo /foo == true +isAncestorOrEqual /foo/bar /foo == true +isAncestorOrEqual /foo /foo == true + +containedIn /foo /foo/bar == true +containedIn /foo /foo == true +contains /foo/bar /foo == true +contains /foo /foo == true + +partOf /foo /foo/bar == true + +isEquals /foo /foo == true +equals /foo /foo == true + +``` ### Out of scope (for now at least) @@ -117,7 +247,6 @@ Laws: - extension getter/setter - List of all ancestors (including self), like - ## General design decisions Each subsection here contains a decision along with arguments and counter-arguments for (+) and against (-) that decision. @@ -129,7 +258,11 @@ Context: Relative paths can have a leading `./` to indicate it being a relative Decision: Returned relative paths should never have a leading `./` +TODO: Inconsistent with the decision [to use `./.` for the current directory][curdir]. + - :heavy_minus_sign: In shells, just running `foo` as a command wouldn't execute the file `foo`, whereas `./foo` would execute the file. In contrast, `foo/bar` does execute that file without the need for `./`. This can lead to confusion about when a `./` needs to be prefixed. If a `./` is always included, this becomes a non-issue. This effectively then means that paths don't overlap with command names. +- :heavy_minus_sign: Nix path expressions need at least a single `/` to trigger, so adding a `./` would make that work + - :heavy_minus_sign: Though there's no good reason why anybody would want to put the output of path expressions directly back into Nix, and if it doesn't work they'd immediately get a parse error anyways - :heavy_minus_sign: Using paths in command line arguments could give problems if not escaped properly, e.g. if a path was `--version`. This is not a problem with `./--version`. This effectively then means that paths don't overlap with GNU-style command line options - :heavy_plus_sign: The POSIX standard doesn't require `./` - :heavy_plus_sign: It's more pretty without the `./`, good for error messages and co. @@ -142,27 +275,6 @@ Decision: Returned relative paths should never have a leading `./` - :heavy_minus_sign: `find` also outputs results with `./` - :heavy_plus_sign: But only if you give it an argument of `.`. If you give it the argument `some-directory`, it won't prefix that - :heavy_plus_sign: `realpath --relative-to` doesn't output `./`'s -- :heavy_plus_sign: Leads to `split "/foo/bar" == [ "/" "./foo" "./bar" ]`, which - - Is less performant - - Less pretty - - :heavy_minus_sign: This doesn't really matter though - - Makes each component have a `/` on its own, seeming like the components weren't fully split. Joining the string components together with `/` gives `/./foo/./bar` which is unnecessarily complex - - :heavy_minus_sign: This doesn't really matter though -- :heavy_plus_sign: Leads to wanting `split "foo" == [ "." "./foo" ]` - - Makes `./foo` splittable into `[ "." "./foo" ]` again - - :heavy_minus_sign: Why does that matter? - - :heavy_plus_sign: Makes it less performant - - :heavy_plus_sign: Makes it less performant - -### Two leading slashes -[two-slashes]: #two-leading-slashes - -Context: POSIX [specifies](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271) that exactly two leading slashes (e.g. `//foo/bar`) should be handled specially and that the first component can be resolved in an [implementation-defined way](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13). - -Decision: We don't handle this specially and treat `//foo/bar` the same as `/foo/bar` - -- :heavy_plus_sign: These paths generally [aren't used](https://unix.stackexchange.com/questions/256497/on-what-systems-is-foo-bar-different-from-foo-bar) -- :heavy_plus_sign: Handling these, even just erroring or ignoring them, would complicate the implementation and decrease performance ### Representation of the current directory [curdir]: #representation-of-the-current-directory @@ -171,63 +283,50 @@ Context: The current directory can be represented with `.` or `./` or `./.` Decision: It should be `./.` +- :heavy_plus_sign: `./` would be inconsistent with [the decision to not have trailing slashes](#trailing-slashes) +- :heavy_minus_sign: `.` is how `realpath` normalises paths +- :heavy_plus_sign: `.` can be interpreted as a shell command (it's a builtin for sourcing files in bash and zsh) - :heavy_plus_sign: `.` would be the only path without a `/` and therefore not a valid Nix path in expressions - - :heavy_minus_sign: We don't require people to type this in expressions -- :heavy_plus_sign: `.` can be interpreted as a shell command (it's a builtin command for zsh) -- :heavy_plus_sign: `./` inconsistent with [the decision to not have trailing slashes](#trailing-slashes) - :heavy_minus_sign: `./.` is rather long - - :heavy_plus_sign: We don't require users to type this though, it's only used as a library output. - As inputs all three variants are supported - -### `split` being part of the public API -[public-split]: #split-being-part-of-the-public-api - -Context: The main use case for `split` seems to be internal to the library and might not need to be exposed as a public API. -The inverse `join` does have lots of use cases though (it appends path components), so it should definitely be part of the public API + - :heavy_minus_sign: We don't require users to type this though, it's mainly just used as a library output. + As inputs all three variants are supported for relative paths (and we can't do anything about absolute paths) +- :heavy_minus_sign: `builtins.dirOf "foo" == "."`, so `.` would be consistent with that -Decision: `split` should be part of the public API +### Relative path representation +[relrepr]: #relative-path-representation -- :heavy_minus_sign: We don't want to encourage custom path handling, which `split` enables - - :heavy_plus_sign: If there's a need for it, people will do custom handling either way. `split` is a primitive that can make this safer -- :heavy_plus_sign: We might not be able to cover all use cases with our path library - -### Representation -[representation]: #representation - -Context: Paths can be represented directly as a string, or as an attribute set like `{ components = [ "foo" "bar" ]; anchor = "/"; }` +Context: Relative paths can be represented as a string, a list with all the components like `[ "foo" "bar" ]` for `foo/bar`, or with an attribute set like `{ type = "relative-path"; components = [ "foo" "bar" ]; }` Decision: Paths are represented as strings -- :heavy_plus_sign: It's simpler -- :heavy_plus_sign: It's faster - - :heavy_minus_sign: Unless you need to do certain path operations in sequence, e.g. `join [ (join [ "/foo" "bar" ]) "baz" ]` needs the inner `join` to return a string composed of its arguments, only for that string to be decomposed again in the outer `join` - - :heavy_plus_sign: We can mostly avoid such costs by exporting sufficiently powerful functions, so that users don't need to make multiple roundtrips to the library representation -- :heavy_plus_sign: `+` is convenient and doesn't work on attribute sets - - :heavy_minus_sign: It works if we add `__toString` attributes - - :heavy_plus_sign: But then all other attributes get wiped - - :heavy_plus_sign: And we'd then be able to `+` paths again +- :heavy_plus_sign: It's simpler for the end user, as one doesn't need to make sure the path is in a string representation before it can be used + - :heavy_plus_sign: Also `concatStringsSep "/"` might be used to turn a relative list path value into a string, which then breaks for `[]` +- :heavy_plus_sign: It doesn't encourage people to do their own path processing and instead use the library + E.g. With lists it would be very easy to just use `lib.lists.init` to get the parent directory, but then it breaks for `.`, represented as `[ ]` +- :heavy_plus_sign: `+` is convenient and doesn't work on lists and attribute sets + - :heavy_minus_sign: Shouldn't use `+` anyways, we export safer functions for path manipulation ### Parents [parents]: #parents -Context: Paths can have `..` components, which refer to the parent directory +Context: Relative paths can have `..` components, which refer to the parent directory -Decision: `..` path components are not supported, nor as inputs nor as outputs. +Decision: `..` path components in relative paths are not supported, nor as inputs nor as outputs. - :heavy_plus_sign: It requires resolving symlinks to have proper behavior, since e.g. `foo/..` would not be the same as `.` if `foo` is a symlink. - :heavy_plus_sign: We can't resolve symlinks without filesystem access - :heavy_plus_sign: Nix also doesn't support reading symlinks at eval-time - :heavy_minus_sign: What is "proper behavior"? Why can't we just not handle these cases? - - :heavy_plus_sign: E.g. `equals "/foo" "/foo/bar/.."` should those paths be equal? + - :heavy_plus_sign: E.g. `equals "foo" "foo/bar/.."` should those paths be equal? - :heavy_minus_sign: That can just return `false`, the paths are different, we don't need to check whether the paths point to the same thing - - :heavy_plus_sign: E.g. `relativeTo "/foo" "/bar" == "../foo"`. If this is used like `/bar/../foo` in the end and `bar` is a symlink to somewhere else, this won't be accurate + - :heavy_plus_sign: E.g. `relativeTo /foo /bar == "../foo"`. If this is used like `/bar/../foo` in the end and `bar` is a symlink to somewhere else, this won't be accurate - :heavy_minus_sign: We could not support such ambiguous operations, or mark them as such, e.g. the normal `relativeTo` will error on such a case, but there could be `extendedRelativeTo` supporting that - :heavy_minus_sign: `..` are a part of paths, a path library should therefore support it - :heavy_plus_sign: If we can prove that all such use cases are better done e.g. with runtime tools, the library not supporting it can nudge people towards that - :heavy_minus_sign: Can we prove that though? - :heavy_minus_sign: We could allow ".." just in the beginning - - :heavy_plus_sign: Then we'd have to throw an error for doing `join [ "/some/path" "../foo" ]`, making it non-composable - - :heavy_plus_sign: The same is for returning paths with `..`: `relativeTo "/foo" "/bar" => "../foo"` would produce a non-composable path + - :heavy_plus_sign: Then we'd have to throw an error for doing `append /some/path "../foo"`, making it non-composable + - :heavy_plus_sign: The same is for returning paths with `..`: `relativeTo /foo /bar => "../foo"` would produce a non-composable path - :heavy_plus_sign: We argue that `..` is not needed at the Nix evaluation level, since we'd always start evaluation from the project root and don't go up from there - :heavy_plus_sign: And `..` is supported in Nix paths, turning them into absolute paths - :heavy_minus_sign: This is ambiguous with symlinks though @@ -237,10 +336,11 @@ Decision: `..` path components are not supported, nor as inputs nor as outputs. ### Trailing slashes [trailing-slashes]: #trailing-slashes -Context: Paths can contain trailing slashes, like `foo/`, indicating that the path points to a directory and not a file +Context: Relative paths can contain trailing slashes, like `foo/`, indicating that the path points to a directory and not a file Decision: All functions remove trailing slashes in their results +- :heavy_plus_sign: It enables the law that if `normalise p == normalise q` then `$(stat p) == $(stat q)`. - Comparison to other frameworks to figure out the least surprising behavior: - :heavy_plus_sign: Nix itself doesn't preserve trailing newlines when parsing and appending its paths - :heavy_minus_sign: [Rust's std::path](https://doc.rust-lang.org/std/path/index.html) does preserve them during [construction](https://doc.rust-lang.org/std/path/struct.Path.html#method.new) @@ -252,21 +352,18 @@ Decision: All functions remove trailing slashes in their results - :heavy_minus_sign: Does preserve them for [normalisation](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#v:normalise) - :heavy_minus_sign: [NodeJS's Path library](https://nodejs.org/api/path.html) preserves trailing slashes for [normalisation](https://nodejs.org/api/path.html#pathnormalizepath) - :heavy_plus_sign: For [parsing a path](https://nodejs.org/api/path.html#pathparsepath) into its significant elements, trailing slashes are not preserved -- :heavy_plus_sign: Nix's builtin function `dirOf` gives an unexpected result for paths with trailing slashes: `dirOf "/foo/bar/" == "/foo/bar"`. - Inconsistently, `baseNameOf` works correctly though: `baseNameOf "/foo/bar/" == "bar"`. +- :heavy_plus_sign: Nix's builtin function `dirOf` gives an unexpected result for paths with trailing slashes: `dirOf "foo/bar/" == "foo/bar"`. + Inconsistently, `baseNameOf` works correctly though: `baseNameOf "foo/bar/" == "bar"`. - :heavy_minus_sign: We are writing a path library to improve handling of paths though, so we shouldn't use these functions and discourage their use -- :heavy_minus_sign: Unexpected result when normalising intermediate paths, like `normalise ("/foo" + "/") + "bar" == "/foobar"` +- :heavy_minus_sign: Unexpected result when normalising intermediate paths, like `normalise ("foo" + "/") + "bar" == "foobar"` - :heavy_plus_sign: Does this have a real use case? - :heavy_plus_sign: Don't use `+` to append paths, this library has a `join` function for that - - :heavy_minus_sign: Users might use `+` instinctively though + - :heavy_minus_sign: Users might use `+` out of habit though - :heavy_plus_sign: The `realpath` command also removes trailing slashes - :heavy_plus_sign: Even with a trailing slash, the path is the same, it's only an indication that it's a directory - :heavy_plus_sign: Normalisation should return the same string when we know it's the same path, so removing the slash. This way we can use the result as an attribute key. -TODO: -- Add more language comparisons - ## Other implementations and references - [Rust](https://doc.rust-lang.org/std/path/struct.Path.html) From 2981d879c2b91d5556c2cc6c980bdc086823504f Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Tue, 6 Dec 2022 22:36:44 +0100 Subject: [PATCH 4/4] Update code to match the design document Also lays down the assumptions we're making about paths, assumptions which notably also make the library work with the lazy trees Nix PR (without relying or interfering with any of its bugs) --- lib/path-design.md | 11 ++++ lib/path.nix | 135 +++++++++++++++++++++++++++++++++------------ 2 files changed, 110 insertions(+), 36 deletions(-) diff --git a/lib/path-design.md b/lib/path-design.md index 25e9ba71285fc..740adb95a3b8e 100644 --- a/lib/path-design.md +++ b/lib/path-design.md @@ -18,6 +18,17 @@ This library is designed to be as safe and intuitive as possible, throwing error This library is designed to work well as a dependency for the `lib.filesystem` and `lib.sources` library components. Contrary to these library components, `lib.path` is designed to not read any paths from the filesystem. +This library makes only these assumptions about paths and no others: +- `dirOf path` returns the path to the parent directory of `path`, unless `path` is the filesystem root, in which case `path` is returned + - There can be multiple filesystem roots: `p == dirOf p` and `q == dirOf p` does not imply `p == q` + - While there's only a single filesystem root in stable Nix, the [lazy trees PR](https://github.com/NixOS/nix/pull/6530) introduces [additional filesystem roots](https://github.com/NixOS/nix/pull/6530#discussion_r1041442173) +- `path + ("/" + string)` returns the path to the `string` subdirectory in `path` + - If `string` contains no `/` characters, then `dirOf (path + ("/" + string)) == path` + - If `string` contains no `/` characters, then `baseNameOf (path + ("/" + string)) == string` +- `path1 == path2` returns true only if `path1` points to the same filesystem path as `path2` + +Notably we do not make the assumption that we can turn paths into strings using `toString path`. + ## API ### `append` diff --git a/lib/path.nix b/lib/path.nix index 3bc1132ef8cef..363ae889052cc 100644 --- a/lib/path.nix +++ b/lib/path.nix @@ -13,6 +13,10 @@ let assertMsg ; + inherit (lib.path) + commonAncestry + ; + inherit (lib.lists) length head @@ -33,18 +37,24 @@ let concatStringsSep ; + inherit (lib.attrsets) + mapAttrsToList + ; + + pretty = toPretty { multiline = false; }; + validRelativeString = value: errorPrefix: if value == "" then - throw "${errorPrefix}: The string is empty, which is not a valid path" + throw "${errorPrefix}: The string is empty" else if substring 0 1 value == "/" then - throw "${errorPrefix}: The string starts with a `/`, representing an absolute path. Use a path value for absolute paths instead" + throw "${errorPrefix}: The string is an absolute path because it starts with `/`" else true; - # Splits a relative path string into its components + # Splits and normalises a relative path string into its components # Errors for ".." components, doesn't include "." components - normaliseComponents = path: errorPrefix: - assert assertMsg (isString path) "${errorPrefix}: Not a relative path string"; - assert validRelativeString path "${errorPrefix}: Not a valid relative path string"; + splitRelative = path: errorPrefix: + #assert assertMsg (isString path) "${errorPrefix}: Not a relative path string"; + #assert validRelativeString path "${errorPrefix}: Not a valid relative path string"; let # Split the string into its parts using regex for efficiency. This regex # matches patterns like "/", "/./", "/././", with arbitrarily many "/"s @@ -98,41 +108,94 @@ let value ) componentCount; - joinAbsolute = firstPath: relativePaths: - let - allComponents = concatLists (imap1 (i: el: - normaliseComponents el "lib.path.join: Cannot normalise element ${toPretty { multiline = false; } el} at index ${toString i}" - ) relativePaths); - in - if allComponents == [] then firstPath - else firstPath + ("/" + concatStringsSep "/" allComponents); - joinRelative = relativePaths: + + joinRelative = components: + # An empty string is not a valid relative path, so we need to return a `.` when we have no components + if components == [] then "." + else concatStringsSep "/" components; + + isRoot = path: path == dirOf path; + + deconstructPath = path: let - allComponents = concatLists (imap0 (i: el: - normaliseComponents el "lib.path.join: Cannot normalise element ${toPretty { multiline = false; } el} at index ${toString i}" - ) relativePaths); - in - # An empty string is not a valid relative path, so we need to return a `.` when we have no components - if allComponents == [] then "." - else concatStringsSep "/" allComponents; + go = components: path: + if isRoot path then { root = path; inherit components; } + else go ([ (baseNameOf path) ] ++ components) (dirOf path); + in go [] path; in /* No rec! Add dependencies on this file just above */ { - join = paths: - assert assertMsg (paths != []) "lib.path.join: No paths provided"; - let firstPath = head paths; in - if isPath firstPath then joinAbsolute firstPath (tail paths) - else if isString firstPath then joinRelative paths - else throw "lib.path.join: First passed element ${toPretty { multiline = false; } firstPath} is neither an absolute path value nor a relative path string"; - - normalise = path: - if isPath path then path - else if isString path then - let components = normaliseComponents path "lib.path.normalise: Cannot normalise value ${toPretty { multiline = false; } path}"; - in if components == [] then "." - else concatStringsSep "/" components - else throw "lib.path.normalise: Passed value ${toPretty { multiline = false; } path} is neither an absolute path value nor a relative path string"; + append = basePath: subpath: + assert assertMsg (isPath basePath) "lib.path.append: First argument ${pretty basePath} is not a path value"; + assert assertMsg (isString subpath) "lib.path.append: Second argument ${pretty subpath} is not a string"; + assert validRelativeString subpath "lib.path.append: Second argument ${subpath} is not a valid relative path string"; + let components = splitRelative subpath "lib.path.append: Second argument ${subpath} can't be normalised"; + in basePath + ("/" + joinRelative components); + + + relative.join = paths: + let + allComponents = concatLists (imap0 (i: subpath: + assert assertMsg (isString subpath) "lib.path.relative.join: Element ${toString subpath} at index ${toString i} is not a string"; + assert validRelativeString subpath "lib.path.relative.join: Element ${toString subpath} at index ${toString i} is not a valid relative path string"; + splitRelative subpath "lib.path.relative.join: Element ${toString subpath} at index ${toString i} can't be normalised" + ) paths); + in joinRelative allComponents; + + relative.normalise = path: + assert assertMsg (isString path) "lib.path.relative.normalise: Argument ${toString path} is not a string"; + assert validRelativeString path "lib.path.relative.normalise: Argument ${toString path} is not a valid relative path string"; + let components = splitRelative path "lib.path.relative.normalise: Argument ${toString path} can't be normalised"; + in joinRelative components; + + commonAncestry = paths: + let + deconstructed = lib.attrValues (lib.mapAttrs (name: value: + assert assertMsg (isPath value) "lib.path.commonAncestry: Attribute ${name} = ${pretty value} is not a path data type"; + deconstructPath value // { inherit name value; } + ) paths); + pathHead = head deconstructed; + pathTail = tail deconstructed; + + go = level: + if lib.all (x: length x.components > level) deconstructed + && lib.all (x: elemAt x.components level == elemAt pathHead.components level) pathTail + then go (level + 1) + else level; + + root = + # Fast happy path in case all roots are the same + if lib.all (x: x.root == pathHead.root) pathTail then pathHead.root + # Slow sad path when that's not the case and we need to throw an error + else lib.foldl' (result: el: + if pathHead.root == el.root then result + else throw "lib.path.commonAncestry: Path ${pathHead.name} = ${toString pathHead.value} (root ${toString pathHead.root}) has a different filesystem root than path ${toString el.name} = ${toString el.value} (root ${toString el.root})" + ) null pathTail; + + level = + # Ensure that we have a common root before trying to find a common ancestor + # If we didn't do this one could evaluate `relativePaths` without an error even when there's no common root + builtins.seq root + (go 0); + + prefix = joinRelative (lib.sublist 0 level pathHead.components); + suffices = lib.listToAttrs (map (x: { name = x.name; value = joinRelative (lib.sublist level (lib.length x.components - level) x.components); }) deconstructed); + in + assert assertMsg (lib.isAttrs paths) "lib.path.commonAncestry: Expecting an attribute set as an argument but got: ${pretty paths}"; + assert assertMsg (length deconstructed > 0) "lib.path.commonAncestry: No paths passed"; + { + commonPrefix = root + ("/" + prefix); + relativePaths = suffices; + }; + + relativeTo = basePath: subpath: + assert assertMsg (isPath basePath) "lib.path.relativeTo: First argument ${pretty basePath} is not a path value"; + assert assertMsg (isPath subpath) "lib.path.relativeTo: First argument ${pretty subpath} is not a path value"; + let common = commonAncestry { inherit basePath subpath; }; in + assert assertMsg (common.commonPrefix == basePath && common.relativePaths.basePath == ".") + "lib.path.relativeTo: First arguments ${toString basePath} needs to be an ancestor of or equal to the second argument ${toString subpath}"; + common.relativePaths.subpath; }