From c94488e79445f5c45d748423e2503e7b4608049d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 27 Aug 2024 17:10:30 -0400 Subject: [PATCH 1/6] [3.10] [3.11] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286d78d328c2f575e0b05b16fe447d1656d6) (cherry picked from commit 17b77bb) Co-authored-by: Jason R. Coombs --- Lib/test/test_zipfile.py | 74 +++++++++++++++++-- Lib/zipfile.py | 69 ++--------------- ...-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst | 3 + 3 files changed, 77 insertions(+), 69 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index a60dc11688d20b..b08ced8b936a2c 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3282,20 +3282,78 @@ def test_extract_orig_with_implied_dirs(self, alpharep): def test_malformed_paths(self): """ - Path should handle malformed paths. + Path should handle malformed paths gracefully. + + Paths with leading slashes are not visible. + + Paths with dots are treated like regular files. """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") - zf.writestr("/one-slash.txt", b"content") - zf.writestr("//two-slash.txt", b"content") zf.writestr("../parent.txt", b"content") zf.filename = '' root = zipfile.Path(zf) - assert list(map(str, root.iterdir())) == [ - 'one-slash.txt', - 'two-slash.txt', - 'parent.txt', - ] + assert list(map(str, root.iterdir())) == ['../'] + assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' + + def test_unsupported_names(self): + """ + Path segments with special characters are readable. + + On some platforms or file systems, characters like + ``:`` and ``?`` are not allowed, but they are valid + in the zip file. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("path?", b"content") + zf.writestr("V: NMS.flac", b"fLaC...") + zf.filename = '' + root = zipfile.Path(zf) + contents = root.iterdir() + assert next(contents).name == 'path?' + assert next(contents).name == 'V: NMS.flac' + assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." + + def test_backslash_not_separator(self): + """ + In a zip file, backslashes are not separators. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") + zf.filename = '' + root = zipfile.Path(zf) + (first,) = root.iterdir() + assert not first.is_dir() + assert first.name == 'foo\\bar' + + +class DirtyZipInfo(zipfile.ZipInfo): + """ + Bypass name sanitization. + """ + + def __init__(self, filename, *args, **kwargs): + super().__init__(filename, *args, **kwargs) + self.filename = filename + + @classmethod + def for_name(cls, name, archive): + """ + Construct the same way that ZipFile.writestr does. + + TODO: extract this functionality and re-use + """ + self = cls(filename=name, date_time=time.localtime(time.time())[:6]) + self.compress_type = archive.compression + self.compress_level = archive.compresslevel + if self.filename.endswith('/'): # pragma: no cover + self.external_attr = 0o40775 << 16 # drwxrwxr-x + self.external_attr |= 0x10 # MS-DOS directory flag + else: + self.external_attr = 0o600 << 16 # ?rw------- + return self class StripExtraTests(unittest.TestCase): diff --git a/Lib/zipfile.py b/Lib/zipfile.py index cbac8d9160e72b..9b66a9f054dc6b 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -2152,7 +2152,7 @@ def _parents(path): def _ancestry(path): """ Given a path with elements separated by - posixpath.sep, generate all elements of that path + posixpath.sep, generate all elements of that path. >>> list(_ancestry('b/d')) ['b/d', 'b'] @@ -2164,9 +2164,14 @@ def _ancestry(path): ['b'] >>> list(_ancestry('')) [] + + Multiple separators are treated like a single. + + >>> list(_ancestry('//b//d///f//')) + ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: + while path.rstrip(posixpath.sep): yield path path, tail = posixpath.split(path) @@ -2183,65 +2188,7 @@ def _difference(minuend, subtrahend): return itertools.filterfalse(set(subtrahend).__contains__, minuend) -class SanitizedNames: - """ - ZipFile mix-in to ensure names are sanitized. - """ - - def namelist(self): - return list(map(self._sanitize, super().namelist())) - - @staticmethod - def _sanitize(name): - r""" - Ensure a relative path with posix separators and no dot names. - Modeled after - https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 - but provides consistent cross-platform behavior. - >>> san = SanitizedNames._sanitize - >>> san('/foo/bar') - 'foo/bar' - >>> san('//foo.txt') - 'foo.txt' - >>> san('foo/.././bar.txt') - 'foo/bar.txt' - >>> san('foo../.bar.txt') - 'foo../.bar.txt' - >>> san('\\foo\\bar.txt') - 'foo/bar.txt' - >>> san('D:\\foo.txt') - 'D/foo.txt' - >>> san('\\\\server\\share\\file.txt') - 'server/share/file.txt' - >>> san('\\\\?\\GLOBALROOT\\Volume3') - '?/GLOBALROOT/Volume3' - >>> san('\\\\.\\PhysicalDrive1\\root') - 'PhysicalDrive1/root' - Retain any trailing slash. - >>> san('abc/') - 'abc/' - Raises a ValueError if the result is empty. - >>> san('../..') - Traceback (most recent call last): - ... - ValueError: Empty filename - """ - - def allowed(part): - return part and part not in {'..', '.'} - - # Remove the drive letter. - # Don't use ntpath.splitdrive, because that also strips UNC paths - bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) - clean = bare.replace('\\', '/') - parts = clean.split('/') - joined = '/'.join(filter(allowed, parts)) - if not joined: - raise ValueError("Empty filename") - return joined + '/' * name.endswith('/') - - -class CompleteDirs(SanitizedNames, ZipFile): +class CompleteDirs(ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. diff --git a/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst new file mode 100644 index 00000000000000..ee9fde6a9ed87a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-26-13-45-20.gh-issue-123270.gXHvNJ.rst @@ -0,0 +1,3 @@ +Applied a more surgical fix for malformed payloads in :class:`zipfile.Path` +causing infinite loops (gh-122905) without breaking contents using +legitimate characters. From 87ac08dcc5dbea0330d0b6b79736c7d5cadd12bd Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 2 Sep 2024 09:49:26 -0400 Subject: [PATCH 2/6] Add some context to the assertion for troubleshooting. --- Lib/test/test_zipfile.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index b08ced8b936a2c..9462e6da2b7192 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3312,7 +3312,8 @@ def test_unsupported_names(self): root = zipfile.Path(zf) contents = root.iterdir() assert next(contents).name == 'path?' - assert next(contents).name == 'V: NMS.flac' + item = next(contents) + assert item.name == 'V: NMS.flac', item.name assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." def test_backslash_not_separator(self): @@ -3326,7 +3327,7 @@ def test_backslash_not_separator(self): root = zipfile.Path(zf) (first,) = root.iterdir() assert not first.is_dir() - assert first.name == 'foo\\bar' + assert first.name == 'foo\\bar', first.name class DirtyZipInfo(zipfile.ZipInfo): From d1eca1e5799f08bc3e9c3adc6e50f0c8dbfc444d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 2 Sep 2024 10:09:02 -0400 Subject: [PATCH 3/6] Remove tests not relevant to the backported security fix. Ref https://github.com/python/cpython/pull/123426#issuecomment-2324839726 --- Lib/test/test_zipfile.py | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 9462e6da2b7192..0fc2a6e45e73ce 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3296,39 +3296,6 @@ def test_malformed_paths(self): assert list(map(str, root.iterdir())) == ['../'] assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' - def test_unsupported_names(self): - """ - Path segments with special characters are readable. - - On some platforms or file systems, characters like - ``:`` and ``?`` are not allowed, but they are valid - in the zip file. - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr("path?", b"content") - zf.writestr("V: NMS.flac", b"fLaC...") - zf.filename = '' - root = zipfile.Path(zf) - contents = root.iterdir() - assert next(contents).name == 'path?' - item = next(contents) - assert item.name == 'V: NMS.flac', item.name - assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." - - def test_backslash_not_separator(self): - """ - In a zip file, backslashes are not separators. - """ - data = io.BytesIO() - zf = zipfile.ZipFile(data, "w") - zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") - zf.filename = '' - root = zipfile.Path(zf) - (first,) = root.iterdir() - assert not first.is_dir() - assert first.name == 'foo\\bar', first.name - class DirtyZipInfo(zipfile.ZipInfo): """ From d5c243c9aaf1b82f40024b8473067797061c934f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 2 Sep 2024 15:46:28 -0400 Subject: [PATCH 4/6] Restore the slash-prefixed paths in the malformed_paths test. --- Lib/test/test_zipfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 0fc2a6e45e73ce..53074b43cb56d0 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3290,6 +3290,8 @@ def test_malformed_paths(self): """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") + zf.writestr("/one-slash.txt", b"content") + zf.writestr("//two-slash.txt", b"content") zf.writestr("../parent.txt", b"content") zf.filename = '' root = zipfile.Path(zf) From ee0d35f248cad46cd2a839ddc494616eb83f4180 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 4 Sep 2024 13:37:39 -0400 Subject: [PATCH 5/6] Revert "Remove tests not relevant to the backported security fix." This reverts commit d1eca1e5799f08bc3e9c3adc6e50f0c8dbfc444d. --- Lib/test/test_zipfile.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 53074b43cb56d0..b4f71a8b3f64df 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -3298,6 +3298,39 @@ def test_malformed_paths(self): assert list(map(str, root.iterdir())) == ['../'] assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' + def test_unsupported_names(self): + """ + Path segments with special characters are readable. + + On some platforms or file systems, characters like + ``:`` and ``?`` are not allowed, but they are valid + in the zip file. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("path?", b"content") + zf.writestr("V: NMS.flac", b"fLaC...") + zf.filename = '' + root = zipfile.Path(zf) + contents = root.iterdir() + assert next(contents).name == 'path?' + item = next(contents) + assert item.name == 'V: NMS.flac', item.name + assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." + + def test_backslash_not_separator(self): + """ + In a zip file, backslashes are not separators. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") + zf.filename = '' + root = zipfile.Path(zf) + (first,) = root.iterdir() + assert not first.is_dir() + assert first.name == 'foo\\bar', first.name + class DirtyZipInfo(zipfile.ZipInfo): """ From 45a792751b93181390b8709cdbeb449983bf4753 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Wed, 4 Sep 2024 14:00:08 -0400 Subject: [PATCH 6/6] Skip failing tests, known to fail, referencing tracking bug. --- Lib/test/test_zipfile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index b4f71a8b3f64df..33e5dfc61c5e73 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -5,6 +5,7 @@ import itertools import os import pathlib +import platform import posixpath import string import struct @@ -3298,6 +3299,7 @@ def test_malformed_paths(self): assert list(map(str, root.iterdir())) == ['../'] assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' + @unittest.skipIf(platform.system() == "Windows", "GH-123693") def test_unsupported_names(self): """ Path segments with special characters are readable. @@ -3318,6 +3320,7 @@ def test_unsupported_names(self): assert item.name == 'V: NMS.flac', item.name assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." + @unittest.skipIf(platform.system() == "Windows", "GH-123693") def test_backslash_not_separator(self): """ In a zip file, backslashes are not separators.