Skip to content

Commit

Permalink
Implement, use, and test symlink resolution relative to a custom root…
Browse files Browse the repository at this point in the history
… directory
  • Loading branch information
hartwork committed Aug 6, 2022
1 parent 91de73f commit 97edaa0
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 34 deletions.
115 changes: 81 additions & 34 deletions src/distro/distro.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
"""

import argparse
import errno
import json
import logging
import os
Expand All @@ -38,11 +37,13 @@
import subprocess
import sys
import warnings
from string import ascii_letters
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Sequence,
TextIO,
Expand Down Expand Up @@ -154,6 +155,84 @@ class InfoDict(TypedDict):
"iredmail-release",
)

_IS_WINDOWS = os.name == "nt"


def _is_drive_plus_colon(candidate: str) -> bool:
return len(candidate) == 2 and candidate[0] in ascii_letters and candidate[1] == ":"


def _realpath_with_root(path: str, root: str) -> str:
"""
Remake of ``os.abs_path.realpath`` (with the same default ``strict=False`` behavior)
that not only resolves symlinks and dot-dot segments in ``path``
but also supports a custom root (or chroot) directory ``root``.
A custom implementation turned out needed, e.g. because a symlink to
``../../etc/os-release`` inside the chroot would be correct to resolve to
``<chroot>/etc/os-release`` rather than ``<chroot>/../../etc/os-release``.
Where the resolution algorithm differs among Linux on Windows, this implementation
is closer to what Linux would do.
"""
abs_path = os.path.abspath(path)

input_segments = abs_path.split(os.sep)
output_segments: List[str] = []

abs_root = os.path.abspath(root)
root_segments = abs_root.split(os.sep)
if input_segments[: len(root_segments)] != root_segments:
# Path ``abs_path`` is not inside chroot ``abs_root``, syntactically.
return os.path.realpath(path)
input_segments = input_segments[len(root_segments) :] # noqa: E203 (for black)

files_seen_before = set()
keep_probing = True

while input_segments:
first_segment, *input_segments = input_segments # i.e. pop first element

if first_segment == "":
pass
elif first_segment == "..":
if output_segments:
output_segments = output_segments[:-1]
elif first_segment != ".":
file_under_test = os.sep.join(
root_segments + output_segments + [first_segment]
)
if file_under_test in files_seen_before:
keep_probing = False
else:
files_seen_before.add(file_under_test)

if not keep_probing or not os.path.islink(file_under_test):
output_segments.append(first_segment)
else:
link_text = os.readlink(file_under_test)
input_segments = link_text.split(os.sep) + input_segments

# Reset output for absolute symlinks as needed
assert input_segments # symlinks cannot be empty
if _IS_WINDOWS and _is_drive_plus_colon(input_segments[0]):
output_segments = []
input_segments = input_segments[1:]
elif (
_IS_WINDOWS
and len(input_segments) >= 4
and input_segments[:3] == ["", "", "?"]
and _is_drive_plus_colon(input_segments[3])
): # i.e. drive style UNC path
output_segments = []
input_segments = input_segments[4:]
elif input_segments[0] == "": # i.e. an absolute path
output_segments = []

output_segments = root_segments + output_segments

return os.sep.join(output_segments)


def linux_distribution(full_distribution_name: bool = True) -> Tuple[str, str, str]:
"""
Expand Down Expand Up @@ -1114,39 +1193,7 @@ def __resolve_chroot_symlink_as_needed(self, link_location: str) -> str:
if self.root_dir is None:
return link_location

if os.path.commonprefix([self.root_dir, link_location]) != self.root_dir:
raise FileNotFoundError

seen_paths = set()
while True:
try:
resolved = os.readlink(link_location)
except OSError: # includes case "not a symlink"
if os.path.commonprefix(
[
os.path.realpath(self.root_dir),
os.path.realpath(link_location),
]
) != os.path.realpath(self.root_dir):
# `link_location` resolves outside of `self.root_dir`.
raise FileNotFoundError from None

return link_location

if os.path.isabs(resolved):
# i.e. absolute path (regarding to the chroot), that we need to
# "move" back inside the chroot.
resolved = self.__abs_path_join(self.root_dir, resolved)
else:
# i.e. relative path that we make absolute
resolved = os.path.join(os.path.dirname(link_location), resolved)

# prevent symlinks infinite loop
if resolved in seen_paths:
raise OSError(errno.ELOOP, os.strerror(errno.ELOOP), resolved)

seen_paths.add(link_location)
link_location = resolved
return _realpath_with_root(link_location, root=self.root_dir)

@cached_property
def _os_release_info(self) -> Dict[str, str]:
Expand Down
141 changes: 141 additions & 0 deletions tests/test_distro.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,15 @@
import os
import subprocess
import sys
import unittest
from tempfile import TemporaryDirectory
from types import FunctionType
from typing import Any, Dict, List, NoReturn, Optional

import pytest

from distro.distro import _realpath_with_root

BASE = os.path.abspath(os.path.dirname(__file__))
RESOURCES = os.path.join(BASE, "resources")
DISTROS_DIR = os.path.join(RESOURCES, "distros")
Expand All @@ -32,6 +36,8 @@


IS_LINUX = sys.platform.startswith("linux")
IS_WINDOWS = os.name == "nt"

if IS_LINUX:
from distro import distro

Expand Down Expand Up @@ -2300,3 +2306,138 @@ def test_repr(self) -> None:
if attr in ("root_dir", "etc_dir", "usr_lib_dir"):
continue
assert f"{attr}=" in repr_str


class TestRealpathWithRoot(unittest.TestCase):
def test_multiple_levels(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "foo")
os.symlink("bar", dst)
os.symlink("baz", os.path.join(tempdir, "bar"))
expected = os.path.join(tempdir, "baz")
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

def test_no_separators_no_dots(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("foo", dst)
self.assertEqual(os.path.realpath(dst), os.path.join(tempdir, "foo"))
self.assertEqual(
_realpath_with_root(dst, root=tempdir), os.path.join(tempdir, "foo")
)

def test_leading_dots(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("././foo".replace("/", os.sep), dst)
self.assertEqual(os.path.realpath(dst), os.path.join(tempdir, "foo"))
self.assertEqual(
_realpath_with_root(dst, root=tempdir), os.path.join(tempdir, "foo")
)

@pytest.mark.skipif(not IS_WINDOWS, reason="Irrelevant on non-windows")
def test_leading_drive(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink(r"C:\..\foo", dst)
self.assertEqual(os.path.realpath(dst), r"C:\foo")
self.assertEqual(
_realpath_with_root(dst, root=tempdir), os.path.join(tempdir, "foo")
)

def test_multiple_trailing_separators(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("foo" + 3 * os.sep, dst)
self.assertEqual(os.path.realpath(dst), os.path.join(tempdir, "foo"))
self.assertEqual(
_realpath_with_root(dst, root=tempdir), os.path.join(tempdir, "foo")
)

def test_root_single_separator(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink(os.sep, dst)
if IS_WINDOWS:
drive_plus_colon = os.path.splitdrive(tempdir)[0]
expected_without_root = drive_plus_colon + os.sep
else:
expected_without_root = "/"
self.assertEqual(os.path.realpath(dst), expected_without_root)
self.assertEqual(_realpath_with_root(dst, root=tempdir), tempdir)

@pytest.mark.skipif(not IS_LINUX, reason=r"os.readlink messes with \\\ on Windows")
def test_root_multiple_separators(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink(3 * os.sep, dst)
# On Windows we would get:
# FileNotFoundError: [WinError 161] The specified path is invalid
self.assertEqual(os.path.realpath(dst), "/")
# On Windows, ``os.readlink`` interprets ``\\\`` and returns ``\\?\UNC``
self.assertEqual(_realpath_with_root(dst, root=tempdir), tempdir)

def test_beyond_root(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("../../../../../../../etc/passwd".replace("/", os.sep), dst)
if not IS_WINDOWS:
# On Windows we would get OSError: [WinError 4392] The data present
# in the reparse point buffer is invalid
self.assertEqual(os.path.realpath(dst), "/etc/passwd")
self.assertEqual(
_realpath_with_root(dst, root=tempdir),
os.path.join(tempdir, "etc", "passwd"),
)

def test_loop_simple(self) -> None:
with TemporaryDirectory() as tempdir:
tempdir = os.path.realpath(tempdir) # workaround for Windows
dst = os.path.join(tempdir, "bar")
os.symlink("bar", dst)
expected = dst
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

def test_loop_nested(self) -> None:
with TemporaryDirectory() as tempdir:
tempdir = os.path.realpath(tempdir) # workaround for Windows
dst = os.path.join(tempdir, "bar")
os.symlink("bar/foo/baz".replace("/", os.sep), dst)
expected = os.path.join(tempdir, "bar", "foo", "baz")
if not IS_WINDOWS:
# On Windows, after ``bar`` is cut off
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

def test_loop_delayed(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("loop", dst)
os.symlink("loop", os.path.join(tempdir, "loop"))
expected = os.path.join(tempdir, "loop")
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

def test_loop_delayed_pair(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "bar")
os.symlink("loop1", dst)
os.symlink("loop2", os.path.join(tempdir, "loop1"))
os.symlink("loop1", os.path.join(tempdir, "loop2"))
expected = os.path.join(tempdir, "loop1")
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

def test_loop_inner(self) -> None:
with TemporaryDirectory() as tempdir:
dst = os.path.join(tempdir, "foo")
os.symlink("loop/../bar".replace("/", os.sep), dst)
os.symlink("baz", os.path.join(tempdir, "bar"))
os.symlink("loop", os.path.join(tempdir, "loop"))
expected = os.path.join(tempdir, "bar") # i.e. without following "bar"
if not IS_WINDOWS:
# On Windows, symlink ``bar`` would be followed into ``baz``
self.assertEqual(os.path.realpath(dst), expected)
self.assertEqual(_realpath_with_root(dst, root=tempdir), expected)

0 comments on commit 97edaa0

Please sign in to comment.