Skip to content

Commit

Permalink
Fixed bugs with drive-relative paths and NTFS ADS paths in pathlib
Browse files Browse the repository at this point in the history
  • Loading branch information
kmaork committed Jun 13, 2019
1 parent 838f264 commit 15aadf9
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 30 deletions.
80 changes: 52 additions & 28 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,38 +60,49 @@ class _Flavour(object):
def __init__(self):
self.join = self.sep.join

def _split_part(self, part):
"""
Return the drive, root and path parts from a given part.
If the part is a tuple, it already contains these values and therefore is returned.
Otherwise, splitroot is used to parse the part.
"""
if isinstance(part, tuple):
return part
elif isinstance(part, str):
if self.altsep:
part = part.replace(self.altsep, self.sep)
drv, root, rel = self.splitroot(part)
return drv, root, rel.split(self.sep)
else:
raise TypeError(f'argument should be a tuple or an str object, not {type(part)}')

def parse_parts(self, parts):
"""
Parse and join multiple path strings, and
return a tuple of the final drive, root and path parts.
The given parts can be either strings of paths,
or tuples that represent paths, containing the drive, root and list of path parts.
The option for passing a tuple is needed, as the part 'a:b' could be interpreted
either as the relative path 'b' with the drive 'a:',
or as a file 'a' with the NTFS data-stream 'b'.
For example, passing either ('a:', '', ['b']) or ('', '', ['a:b']) instead of 'a:b'
will allow parse_parts to behave properly in these cases.
"""
parsed = []
sep = self.sep
altsep = self.altsep
drv = root = ''
it = reversed(parts)
for part in it:
if not part:
continue
if altsep:
part = part.replace(altsep, sep)
drv, root, rel = self.splitroot(part)
if sep in rel:
for x in reversed(rel.split(sep)):
current_drv, current_root, rel_parts = self._split_part(part)
if not drv:
drv = current_drv
if not root:
root = current_root
for x in reversed(rel_parts):
if x and x != '.':
parsed.append(sys.intern(x))
else:
if rel and rel != '.':
parsed.append(sys.intern(rel))
if drv or root:
if not drv:
# If no drive is present, try to find one in the previous
# parts. This makes the result of parsing e.g.
# ("C:", "/", "a") reasonably intuitive.
for part in it:
if not part:
continue
if altsep:
part = part.replace(altsep, sep)
drv = self.splitroot(part)[0]
if drv:
break
if root and drv:
break
if drv or root:
parsed.append(drv + root)
Expand All @@ -115,6 +126,9 @@ def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2):
return drv, root, parts + parts2
return drv2, root2, parts2

def has_drive(self, part):
return self.splitroot(part)[0] != ''


class _WindowsFlavour(_Flavour):
# Reference for Windows paths can be found at
Expand Down Expand Up @@ -191,18 +205,21 @@ def resolve(self, path, strict=False):
s = str(path)
if not s:
return os.getcwd()
previous_s = None
if _getfinalpathname is not None:
if strict:
return self._ext_to_normal(_getfinalpathname(s))
else:
previous_s = None
tail_parts = [] # End of the path after the first one not found
while True:
try:
s = self._ext_to_normal(_getfinalpathname(s))
except FileNotFoundError:
previous_s = s
s, tail = os.path.split(s)
if self.has_drive(tail):
# To avoid confusing between a filename with a data-stream and a drive letter
tail = f'.{self.sep}{tail}'
tail_parts.append(tail)
if previous_s == s:
return path
Expand Down Expand Up @@ -646,7 +663,10 @@ def _parse_args(cls, args):
parts = []
for a in args:
if isinstance(a, PurePath):
parts += a._parts
path_parts = a._parts
if a._drv or a._root:
path_parts = path_parts[1:]
parts.append((a._drv, a._root, path_parts))
else:
a = os.fspath(a)
if isinstance(a, str):
Expand Down Expand Up @@ -684,6 +704,10 @@ def _from_parsed_parts(cls, drv, root, parts, init=True):

@classmethod
def _format_parsed_parts(cls, drv, root, parts):
if parts and not drv and cls._flavour.has_drive(parts[0]):
# In case there is no drive, and the first part might be interpreted as a drive,
# we add a dot to clarify the first part is not a drive.
parts = ['.'] + parts
if drv or root:
return drv + root + cls._flavour.join(parts[1:])
else:
Expand Down Expand Up @@ -910,7 +934,7 @@ def __truediv__(self, key):
return self._make_child((key,))

def __rtruediv__(self, key):
return self._from_parts([key] + self._parts)
return self._from_parts([key, self])

@property
def parent(self):
Expand Down Expand Up @@ -1138,7 +1162,7 @@ def absolute(self):
return self
# FIXME this must defer to the specific flavour (and, under Windows,
# use nt._getfullpathname())
obj = self._from_parts([os.getcwd()] + self._parts, init=False)
obj = self._from_parts([os.getcwd(), self], init=False)
obj._init(template=self)
return obj

Expand Down Expand Up @@ -1507,7 +1531,7 @@ def expanduser(self):
if (not (self._drv or self._root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.gethomedir(self._parts[0][1:])
return self._from_parts([homedir] + self._parts[1:])
return self._from_parts([homedir, self.relative_to(self._parts[0])])

return self

Expand Down
38 changes: 36 additions & 2 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ def test_parse_parts(self):
check(['//a/b/'], ('\\\\a\\b', '\\', ['\\\\a\\b\\']))
check(['//a/b/c'], ('\\\\a\\b', '\\', ['\\\\a\\b\\', 'c']))
# Second part is anchored, so that the first part is ignored.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'b', 'c']))
check(['a', 'Z:/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
# UNC paths.
check(['a', '//b/c', 'd'], ('\\\\b\\c', '\\', ['\\\\b\\c\\', 'd']))
Expand All @@ -133,6 +132,16 @@ def test_parse_parts(self):
check(['a', '/b', 'c'], ('', '\\', ['\\', 'b', 'c']))
check(['Z:/a', '/b', 'c'], ('Z:', '\\', ['Z:\\', 'b', 'c']))
check(['//?/Z:/a', '/b', 'c'], ('\\\\?\\Z:', '\\', ['\\\\?\\Z:\\', 'b', 'c']))
# Second part has a drive but not root.
check(['a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
check(['Y:a', 'Z:b', 'c'], ('Z:', '', ['Z:', 'a', 'b', 'c']))
# Paths to files with NTFS alternate data streams
check(['./c:s'], ('', '', ['c:s']))
check(['cc:s'], ('', '', ['cc:s']))
check(['C:c:s'], ('C:', '', ['C:', 'c:s']))
check(['C:/c:s'], ('C:', '\\', ['C:\\', 'c:s']))
check(['D:a', './c:b'], ('D:', '', ['D:', 'a', 'c:b']))
check(['D:/a', './c:b'], ('D:', '\\', ['D:\\', 'a', 'c:b']))

def test_splitroot(self):
f = self.flavour.splitroot
Expand Down Expand Up @@ -201,6 +210,7 @@ def test_constructor_common(self):
self.assertEqual(P(P('a'), 'b'), P('a/b'))
self.assertEqual(P(P('a'), P('b')), P('a/b'))
self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
self.assertEqual(P(P('./a:b')), P('./a:b'))

def _check_str_subclass(self, *args):
# Issue #21127: it should be possible to construct a PurePath object
Expand Down Expand Up @@ -712,7 +722,9 @@ class PureWindowsPathTest(_BasePurePathTest, unittest.TestCase):

equivalences = _BasePurePathTest.equivalences.copy()
equivalences.update({
'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('/', 'c:', 'a') ],
'./a:b': [ ('./a:b',) ],
'a:b:c': [ ('./b:c', 'a:'), ('b:', 'a:b:c') ],
'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ],
'c:/a': [
('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'),
('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'),
Expand All @@ -736,6 +748,7 @@ def test_str(self):
self.assertEqual(str(p), '\\\\a\\b\\c\\d')

def test_str_subclass(self):
self._check_str_subclass('.\\a:b')
self._check_str_subclass('c:')
self._check_str_subclass('c:a')
self._check_str_subclass('c:a\\b.txt')
Expand Down Expand Up @@ -882,6 +895,7 @@ def test_drive(self):
self.assertEqual(P('//a/b').drive, '\\\\a\\b')
self.assertEqual(P('//a/b/').drive, '\\\\a\\b')
self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b')
self.assertEqual(P('./c:a').drive, '')

def test_root(self):
P = self.cls
Expand Down Expand Up @@ -1104,6 +1118,14 @@ def test_join(self):
self.assertEqual(pp, P('C:/a/b/x/y'))
pp = p.joinpath('c:/x/y')
self.assertEqual(pp, P('C:/x/y'))
# Joining with files with NTFS data streams => the filename should
# not be parsed as a drive letter
pp = p.joinpath(P('./d:s'))
self.assertEqual(pp, P('C:/a/b/d:s'))
pp = p.joinpath(P('./dd:s'))
self.assertEqual(pp, P('C:/a/b/dd:s'))
pp = p.joinpath(P('E:d:s'))
self.assertEqual(pp, P('E:d:s'))

def test_div(self):
# Basically the same as joinpath().
Expand All @@ -1124,6 +1146,11 @@ def test_div(self):
# the second path is relative.
self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y'))
self.assertEqual(p / 'c:/x/y', P('C:/x/y'))
# Joining with files with NTFS data streams => the filename should
# not be parsed as a drive letter
self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s'))
self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s'))
self.assertEqual(p / P('E:d:s'), P('E:d:s'))

def test_is_reserved(self):
P = self.cls
Expand Down Expand Up @@ -1333,6 +1360,8 @@ def test_expanduser_common(self):
self.assertEqual(p.expanduser(), p)
p = P(P('').absolute().anchor) / '~'
self.assertEqual(p.expanduser(), p)
p = P('~/a:b')
self.assertEqual(p.expanduser(), P(os.path.expanduser('~'), './a:b'))

def test_exists(self):
P = self.cls
Expand Down Expand Up @@ -2328,6 +2357,11 @@ def check():
env['USERPROFILE'] = 'C:\\Users\\alice'
check()

def test_resolve(self):
P = self.cls
p = P(BASE, './a:b')
self.assertEqual(str(p.resolve(strict=False)), f'{BASE}\\a:b')


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a ``pathlib`` inconsistency in handling of paths containing colons.

0 comments on commit 15aadf9

Please sign in to comment.