Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-89973: Fix re.error in the fnmatch module. #93072

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def translate(pat):
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
if '-' not in stuff:
stuff = stuff.replace('\\', r'\\')
else:
chunks = []
Expand All @@ -114,19 +114,35 @@ def translate(pat):
chunks.append(pat[i:k])
i = k+1
k = k+3
chunks.append(pat[i:j])
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += '-'
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks)-1, 0, -1):
if chunks[k-1][-1] > chunks[k][0]:
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
for s in chunks)
# Escape set operations (&&, ~~ and ||).
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
if not stuff:
# Empty range: never match.
add('(?!)')
elif stuff == '!':
# Negated empty range: match any character.
add('.')
else:
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
add(f'[{stuff}]')
else:
add(re.escape(c))
assert i == n
Expand Down
114 changes: 114 additions & 0 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import unittest
import os
import string
import warnings

from fnmatch import fnmatch, fnmatchcase, translate, filter
Expand Down Expand Up @@ -91,6 +92,119 @@ def test_sep(self):
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr\\bin')

def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
# Special cases.
for c in tescases:
check(c, '[^az]', c in '^az')
check(c, '[[az]', c in '[az')
check(c, r'[!]]', c != ']')
check('[', '[')
check('[]', '[]')
check('[!', '[!')
check('[!]', '[!]')

def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[b-d]', c in 'bcd')
check(c, '[!b-d]', c not in 'bcd')
check(c, '[b-dx-z]', c in 'bcdxyz')
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
# Special cases.
for c in tescases:
check(c, '[!-#]', c not in '-#')
check(c, '[!--.]', c not in '-.')
check(c, '[^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
check(c, '[!b-]', c not in '-b')
check(c, '[-b]', c in '-b')
check(c, '[!-b]', c not in '-b')
check(c, '[-]', c in '-')
check(c, '[!-]', c not in '-')
# Upper bound is less that lower bound: error in RE.
for c in tescases:
check(c, '[d-b]', False)
check(c, '[!d-b]', True)
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
if not (normsep and c == '/'):
check(c, '[d-b[-^]', c in r'[\]^')

def test_sep_in_char_set(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('/', r'[/]')
check('\\', r'[\]')
check('/', r'[\]', normsep)
check('\\', r'[/]', normsep)
check('[/]', r'[/]', False)
check(r'[\\]', r'[/]', False)
check('\\', r'[\t]')
check('/', r'[\t]', normsep)
check('t', r'[\t]')
check('\t', r'[\t]', False)

def test_sep_in_range(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('a/b', 'a[.-0]b', not normsep)
check('a\\b', 'a[.-0]b', False)
check('a\\b', 'a[Z-^]b', not normsep)
check('a/b', 'a[Z-^]b', False)

check('a/b', 'a[/-0]b', not normsep)
check(r'a\b', 'a[/-0]b', False)
check('a[/-0]b', 'a[/-0]b', False)
check(r'a[\-0]b', 'a[/-0]b', False)

check('a/b', 'a[.-/]b')
check(r'a\b', 'a[.-/]b', normsep)
check('a[.-/]b', 'a[.-/]b', False)
check(r'a[.-\]b', 'a[.-/]b', False)

check(r'a\b', r'a[\-^]b')
check('a/b', r'a[\-^]b', normsep)
check(r'a[\-^]b', r'a[\-^]b', False)
check('a[/-^]b', r'a[\-^]b', False)

check(r'a\b', r'a[Z-\]b', not normsep)
check('a/b', r'a[Z-\]b', False)
check(r'a[Z-\]b', r'a[Z-\]b', False)
check('a[Z-/]b', r'a[Z-\]b', False)

def test_warnings(self):
with warnings.catch_warnings():
warnings.simplefilter('error', Warning)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a
character range with upper bound lower than lower bound (e.g. ``[c-a]``).
Now such ranges are interpreted as empty ranges.