Fixed bugs with drive-relative paths and NTFS ADS paths in pathlib

python · Jun 13, 2019 · 15aadf9 · 15aadf9
1 parent 838f264
commit 15aadf9
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 30 deletions.
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
@@ -60,38 +60,49 @@ class _Flavour(object):
     def __init__(self):
         self.join = self.sep.join
 
+    def _split_part(self, part):
+        """
+        Return the drive, root and path parts from a given part.
+        If the part is a tuple, it already contains these values and therefore is returned.
+        Otherwise, splitroot is used to parse the part.
+        """
+        if isinstance(part, tuple):
+            return part
+        elif isinstance(part, str):
+            if self.altsep:
+                part = part.replace(self.altsep, self.sep)
+            drv, root, rel = self.splitroot(part)
+            return drv, root, rel.split(self.sep)
+        else:
+            raise TypeError(f'argument should be a tuple or an str object, not {type(part)}')
+
     def parse_parts(self, parts):
+        """
+        Parse and join multiple path strings, and
+        return a tuple of the final drive, root and path parts.
+        The given parts can be either strings of paths,
+        or tuples that represent paths, containing the drive, root and list of path parts.
+        The option for passing a tuple is needed, as the part 'a:b' could be interpreted
+        either as the relative path 'b' with the drive 'a:',
+        or as a file 'a' with the NTFS data-stream 'b'.
+        For example, passing either ('a:', '', ['b']) or ('', '', ['a:b']) instead of 'a:b'
+        will allow parse_parts to behave properly in these cases.
+        """
         parsed = []
-        sep = self.sep
-        altsep = self.altsep
         drv = root = ''
         it = reversed(parts)
         for part in it:
             if not part:
                 continue
-            if altsep:
-                part = part.replace(altsep, sep)
-            drv, root, rel = self.splitroot(part)
-            if sep in rel:
-                for x in reversed(rel.split(sep)):
+            current_drv, current_root, rel_parts = self._split_part(part)
+            if not drv:
+                drv = current_drv
+            if not root:
+                root = current_root
+                for x in reversed(rel_parts):
                     if x and x != '.':
                         parsed.append(sys.intern(x))
-            else:
-                if rel and rel != '.':
-                    parsed.append(sys.intern(rel))
-            if drv or root:
-                if not drv:
-                    # If no drive is present, try to find one in the previous
-                    # parts. This makes the result of parsing e.g.
-                    # ("C:", "/", "a") reasonably intuitive.
-                    for part in it:
-                        if not part:
-                            continue
-                        if altsep:
-                            part = part.replace(altsep, sep)
-                        drv = self.splitroot(part)[0]
-                        if drv:
-                            break
+            if root and drv:
                 break
         if drv or root:
             parsed.append(drv + root)
@@ -115,6 +126,9 @@ def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2):
             return drv, root, parts + parts2
         return drv2, root2, parts2
 
+    def has_drive(self, part):
+        return self.splitroot(part)[0] != ''
+
 
 class _WindowsFlavour(_Flavour):
     # Reference for Windows paths can be found at
@@ -191,18 +205,21 @@ def resolve(self, path, strict=False):
         s = str(path)
         if not s:
             return os.getcwd()
-        previous_s = None
         if _getfinalpathname is not None:
             if strict:
                 return self._ext_to_normal(_getfinalpathname(s))
             else:
+                previous_s = None
                 tail_parts = []  # End of the path after the first one not found
                 while True:
                     try:
                         s = self._ext_to_normal(_getfinalpathname(s))
                     except FileNotFoundError:
                         previous_s = s
                         s, tail = os.path.split(s)
+                        if self.has_drive(tail):
+                            # To avoid confusing between a filename with a data-stream and a drive letter
+                            tail = f'.{self.sep}{tail}'
                         tail_parts.append(tail)
                         if previous_s == s:
                             return path
@@ -646,7 +663,10 @@ def _parse_args(cls, args):
         parts = []
         for a in args:
             if isinstance(a, PurePath):
-                parts += a._parts
+                path_parts = a._parts
+                if a._drv or a._root:
+                    path_parts = path_parts[1:]
+                parts.append((a._drv, a._root, path_parts))
             else:
                 a = os.fspath(a)
                 if isinstance(a, str):
@@ -684,6 +704,10 @@ def _from_parsed_parts(cls, drv, root, parts, init=True):
 
     @classmethod
     def _format_parsed_parts(cls, drv, root, parts):
+        if parts and not drv and cls._flavour.has_drive(parts[0]):
+            # In case there is no drive, and the first part might be interpreted as a drive,
+            # we add a dot to clarify the first part is not a drive.
+            parts = ['.'] + parts
         if drv or root:
             return drv + root + cls._flavour.join(parts[1:])
         else:
@@ -910,7 +934,7 @@ def __truediv__(self, key):
         return self._make_child((key,))
 
     def __rtruediv__(self, key):
-        return self._from_parts([key] + self._parts)
+        return self._from_parts([key, self])
 
     @property
     def parent(self):
@@ -1138,7 +1162,7 @@ def absolute(self):
             return self
         # FIXME this must defer to the specific flavour (and, under Windows,
         # use nt._getfullpathname())
-        obj = self._from_parts([os.getcwd()] + self._parts, init=False)
+        obj = self._from_parts([os.getcwd(), self], init=False)
         obj._init(template=self)
         return obj
 
@@ -1507,7 +1531,7 @@ def expanduser(self):
         if (not (self._drv or self._root) and
             self._parts and self._parts[0][:1] == '~'):
             homedir = self._flavour.gethomedir(self._parts[0][1:])
-            return self._from_parts([homedir] + self._parts[1:])
+            return self._from_parts([homedir, self.relative_to(self._parts[0])])
 
         return self
 

diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py
@@ -114,7 +114,6 @@ def test_parse_parts(self):
         check(['//a/b/'],               ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
         check(['//a/b/c'],              ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
         # Second part is anchored, so that the first part is ignored.
-        check(['a', 'Z:b', 'c'],        ('Z:', '', ['Z:', 'b', 'c']))
         check(['a', 'Z:/b', 'c'],       ('Z:', '\\', ['Z:\\', 'b', 'c']))
         # UNC paths.
         check(['a', '//b/c', 'd'],      ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
@@ -133,6 +132,16 @@ def test_parse_parts(self):
         check(['a', '/b', 'c'],         ('', '\\', ['\\', 'b', 'c']))
         check(['Z:/a', '/b', 'c'],      ('Z:', '\\', ['Z:\\', 'b', 'c']))
         check(['//?/Z:/a', '/b', 'c'],  ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
+        # Second part has a drive but not root.
+        check(['a', 'Z:b', 'c'],        ('Z:', '', ['Z:', 'a', 'b', 'c']))
+        check(['Y:a', 'Z:b', 'c'],      ('Z:', '', ['Z:', 'a', 'b', 'c']))
+        # Paths to files with NTFS alternate data streams
+        check(['./c:s'],                ('', '', ['c:s']))
+        check(['cc:s'],                 ('', '', ['cc:s']))
+        check(['C:c:s'],                ('C:', '', ['C:', 'c:s']))
+        check(['C:/c:s'],               ('C:', '\\', ['C:\\', 'c:s']))
+        check(['D:a', './c:b'],         ('D:', '', ['D:', 'a', 'c:b']))
+        check(['D:/a', './c:b'],        ('D:', '\\', ['D:\\', 'a', 'c:b']))
 
     def test_splitroot(self):
         f = self.flavour.splitroot
@@ -201,6 +210,7 @@ def test_constructor_common(self):
         self.assertEqual(P(P('a'), 'b'), P('a/b'))
         self.assertEqual(P(P('a'), P('b')), P('a/b'))
         self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
+        self.assertEqual(P(P('./a:b')), P('./a:b'))
 
     def _check_str_subclass(self, *args):
         # Issue #21127: it should be possible to construct a PurePath object
@@ -712,7 +722,9 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):
 
     equivalences = _BasePurePathTest.equivalences.copy()
     equivalences.update({
-        'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ],
+        './a:b': [ ('./a:b',) ],
+        'a:b:c': [ ('./b:c', 'a:'), ('b:', 'a:b:c') ],
+        'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ],
         'c:/a': [
             ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'),
             ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'),
@@ -736,6 +748,7 @@ def test_str(self):
         self.assertEqual(str(p), '\\\\a\\b\\c\\d')
 
     def test_str_subclass(self):
+        self._check_str_subclass('.\\a:b')
         self._check_str_subclass('c:')
         self._check_str_subclass('c:a')
         self._check_str_subclass('c:a\\b.txt')
@@ -882,6 +895,7 @@ def test_drive(self):
         self.assertEqual(P('//a/b').drive, '\\\\a\\b')
         self.assertEqual(P('//a/b/').drive, '\\\\a\\b')
         self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b')
+        self.assertEqual(P('./c:a').drive, '')
 
     def test_root(self):
         P = self.cls
@@ -1104,6 +1118,14 @@ def test_join(self):
         self.assertEqual(pp, P('C:/a/b/x/y'))
         pp = p.joinpath('c:/x/y')
         self.assertEqual(pp, P('C:/x/y'))
+        # Joining with files with NTFS data streams => the filename should
+        # not be parsed as a drive letter
+        pp = p.joinpath(P('./d:s'))
+        self.assertEqual(pp, P('C:/a/b/d:s'))
+        pp = p.joinpath(P('./dd:s'))
+        self.assertEqual(pp, P('C:/a/b/dd:s'))
+        pp = p.joinpath(P('E:d:s'))
+        self.assertEqual(pp, P('E:d:s'))
 
     def test_div(self):
         # Basically the same as joinpath().
@@ -1124,6 +1146,11 @@ def test_div(self):
         # the second path is relative.
         self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y'))
         self.assertEqual(p / 'c:/x/y', P('C:/x/y'))
+        # Joining with files with NTFS data streams => the filename should
+        # not be parsed as a drive letter
+        self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s'))
+        self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s'))
+        self.assertEqual(p / P('E:d:s'), P('E:d:s'))
 
     def test_is_reserved(self):
         P = self.cls
@@ -1333,6 +1360,8 @@ def test_expanduser_common(self):
         self.assertEqual(p.expanduser(), p)
         p = P(P('').absolute().anchor) / '~'
         self.assertEqual(p.expanduser(), p)
+        p = P('~/a:b')
+        self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b'))
 
     def test_exists(self):
         P = self.cls
@@ -2328,6 +2357,11 @@ def check():
             env['USERPROFILE'] = 'C:\\Users\\alice'
             check()
 
+    def test_resolve(self):
+        P = self.cls
+        p = P(BASE, './a:b')
+        self.assertEqual(str(p.resolve(strict=False)), f'{BASE}\\a:b')
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2019-03-15-22-50-27.bpo-36305.Pbkv6u.rst b/Misc/NEWS.d/next/Library/2019-03-15-22-50-27.bpo-36305.Pbkv6u.rst
@@ -0,0 +1 @@
+Fix a ``pathlib`` inconsistency in handling of paths containing colons.