From a255b2ffd3e3be6a8070d2605ec50f977186ce5e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Oct 2024 11:40:02 +0300 Subject: [PATCH] gh-53203: Improve tests for strptime() (GH-125090) Run them with different locales and different date and time. Add the @run_with_locales() decorator to run the test with multiple locales. Improve the run_with_locale() context manager/decorator -- it now catches only expected exceptions and reports the test as skipped if no appropriate locale is available. (cherry picked from commit 19984fe024bfd90649f1c36b78c9abf3ed72b27d) Co-authored-by: Serhiy Storchaka --- Lib/test/pickletester.py | 4 +- Lib/test/support/__init__.py | 53 +++++++++- Lib/test/test_codecs.py | 9 +- Lib/test/test_decimal.py | 2 +- Lib/test/test_float.py | 2 +- Lib/test/test_imaplib.py | 2 +- Lib/test/test_str.py | 2 +- Lib/test/test_strptime.py | 195 +++++++++++++++++++++++++---------- Lib/test/test_time.py | 12 +-- Lib/test/test_types.py | 4 +- 10 files changed, 200 insertions(+), 85 deletions(-) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 38b87c42ca5ee0..f216136fa0f26d 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -26,7 +26,7 @@ from test import support from test.support import os_helper from test.support import ( - TestFailed, run_with_locale, no_tracing, + TestFailed, run_with_locales, no_tracing, _2G, _4G, bigmemtest ) from test.support.import_helper import forget @@ -2589,7 +2589,7 @@ def test_float(self): got = self.loads(pickle) self.assert_is_copy(value, got) - @run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @run_with_locales('LC_ALL', 'de_DE', 'fr_FR', '') def test_float_format(self): # make sure that floats are formatted locale independent with proto 0 self.assertEqual(self.dumps(1.2, 0)[0:3], b'F1.') diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index ed23f73e3cfd6d..057b3bbd22944f 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -921,8 +921,8 @@ def check_sizeof(test, o, size): test.assertEqual(result, size, msg) #======================================================================= -# Decorator for running a function in a different locale, correctly resetting -# it afterwards. +# Decorator/context manager for running a code in a different locale, +# correctly resetting it afterwards. @contextlib.contextmanager def run_with_locale(catstr, *locales): @@ -933,16 +933,21 @@ def run_with_locale(catstr, *locales): except AttributeError: # if the test author gives us an invalid category string raise - except: + except Exception: # cannot retrieve original locale, so do nothing locale = orig_locale = None + if '' not in locales: + raise unittest.SkipTest('no locales') else: for loc in locales: try: locale.setlocale(category, loc) break - except: + except locale.Error: pass + else: + if '' not in locales: + raise unittest.SkipTest(f'no locales {locales}') try: yield @@ -950,6 +955,46 @@ def run_with_locale(catstr, *locales): if locale and orig_locale: locale.setlocale(category, orig_locale) +#======================================================================= +# Decorator for running a function in multiple locales (if they are +# availasble) and resetting the original locale afterwards. + +def run_with_locales(catstr, *locales): + def deco(func): + @functools.wraps(func) + def wrapper(self, /, *args, **kwargs): + dry_run = '' in locales + try: + import locale + category = getattr(locale, catstr) + orig_locale = locale.setlocale(category) + except AttributeError: + # if the test author gives us an invalid category string + raise + except Exception: + # cannot retrieve original locale, so do nothing + pass + else: + try: + for loc in locales: + with self.subTest(locale=loc): + try: + locale.setlocale(category, loc) + except locale.Error: + self.skipTest(f'no locale {loc!r}') + else: + dry_run = False + func(self, *args, **kwargs) + finally: + locale.setlocale(category, orig_locale) + if dry_run: + # no locales available, so just run the test + # with the current locale + with self.subTest(locale=None): + func(self, *args, **kwargs) + return wrapper + return deco + #======================================================================= # Decorator for running a function in a specific timezone, correctly # resetting it afterwards. diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index e05b95c2d60bad..d539a86ec262eb 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -2,7 +2,6 @@ import contextlib import copy import io -import locale import pickle import sys import unittest @@ -1812,16 +1811,10 @@ def test_getwriter(self): self.assertRaises(TypeError, codecs.getwriter) self.assertRaises(LookupError, codecs.getwriter, "__spam__") + @support.run_with_locale('LC_CTYPE', 'tr_TR') def test_lookup_issue1813(self): # Issue #1813: under Turkish locales, lookup of some codecs failed # because 'I' is lowercased as "ı" (dotless i) - oldlocale = locale.setlocale(locale.LC_CTYPE) - self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) - try: - locale.setlocale(locale.LC_CTYPE, 'tr_TR') - except locale.Error: - # Unsupported locale on this system - self.skipTest('test needs Turkish locale') c = codecs.lookup('ASCII') self.assertEqual(c.name, 'ascii') diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index c591fd54430b18..d1e7e69e7e951b 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -1253,7 +1253,7 @@ def test_deprecated_N_format(self): self.assertRaises(ValueError, format, h, '10Nf') self.assertRaises(ValueError, format, h, 'Nx') - @run_with_locale('LC_ALL', 'ps_AF') + @run_with_locale('LC_ALL', 'ps_AF', '') def test_wide_char_separator_decimal_point(self): # locale with wide char separator and decimal point Decimal = self.decimal.Decimal diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 65b9cb03e616d7..6b074d537042f8 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -153,7 +153,7 @@ def check(s): # non-UTF-8 byte string check(b'123\xa0') - @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE') + @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE', '') def test_float_with_comma(self): # set locale to something that doesn't use '.' for the decimal point # float must not accept the locale specific decimal point but diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index b5384b59463742..b448227a0292fc 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -57,7 +57,7 @@ def timevalues(self): timezone(timedelta(0, 2 * 60 * 60))), '"18-May-2033 05:33:20 +0200"'] - @run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') # DST rules included to work around quirk where the Gnu C library may not # otherwise restore the previous time zone @run_with_tz('STD-1DST,M3.2.0,M11.1.0') diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index e9ed7a2156a715..a4c92a66aa1eb5 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1665,7 +1665,7 @@ def test_startswith_endswith_errors(self): self.assertIn('str', exc) self.assertIn('tuple', exc) - @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR') + @support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR', '') def test_format_float(self): # should not format with a comma, but always with C locale self.assertEqual('1.0', '%.1f' % 1.0) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 038746e26c24ad..37f6b08db28b3e 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -7,7 +7,8 @@ import os import sys from test import support -from test.support import skip_if_buggy_ucrt_strfptime, warnings_helper +from test.support import warnings_helper +from test.support import skip_if_buggy_ucrt_strfptime, run_with_locales from datetime import date as datetime_date import _strptime @@ -207,8 +208,8 @@ class StrptimeTests(unittest.TestCase): """Tests for _strptime.strptime.""" def setUp(self): - """Create testing time tuple.""" - self.time_tuple = time.gmtime() + """Create testing time tuples.""" + self.time_tuple = time.localtime() def test_ValueError(self): # Make sure ValueError is raised when match fails or format is bad @@ -289,54 +290,67 @@ def test_unconverteddata(self): # Check ValueError is raised when there is unconverted data self.assertRaises(ValueError, _strptime._strptime_time, "10 12", "%m") - def helper(self, directive, position): + def roundtrip(self, fmt, position, time_tuple=None): """Helper fxn in testing.""" - fmt = "%d %Y" if directive == 'd' else "%" + directive - strf_output = time.strftime(fmt, self.time_tuple) + if time_tuple is None: + time_tuple = self.time_tuple + strf_output = time.strftime(fmt, time_tuple) strp_output = _strptime._strptime_time(strf_output, fmt) - self.assertTrue(strp_output[position] == self.time_tuple[position], - "testing of '%s' directive failed; '%s' -> %s != %s" % - (directive, strf_output, strp_output[position], - self.time_tuple[position])) + self.assertEqual(strp_output[position], time_tuple[position], + "testing of %r format failed; %r -> %r != %r" % + (fmt, strf_output, strp_output[position], + time_tuple[position])) + if support.verbose >= 3: + print("testing of %r format: %r -> %r" % + (fmt, strf_output, strp_output[position])) def test_year(self): # Test that the year is handled properly - for directive in ('y', 'Y'): - self.helper(directive, 0) + self.roundtrip('%Y', 0) + self.roundtrip('%y', 0) + self.roundtrip('%Y', 0, (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + # Must also make sure %y values are correct for bounds set by Open Group - for century, bounds in ((1900, ('69', '99')), (2000, ('00', '68'))): - for bound in bounds: - strp_output = _strptime._strptime_time(bound, '%y') - expected_result = century + int(bound) - self.assertTrue(strp_output[0] == expected_result, - "'y' test failed; passed in '%s' " - "and returned '%s'" % (bound, strp_output[0])) + strptime = _strptime._strptime_time + self.assertEqual(strptime('00', '%y')[0], 2000) + self.assertEqual(strptime('68', '%y')[0], 2068) + self.assertEqual(strptime('69', '%y')[0], 1969) + self.assertEqual(strptime('99', '%y')[0], 1999) def test_month(self): # Test for month directives - for directive in ('B', 'b', 'm'): - self.helper(directive, 1) + self.roundtrip('%m', 1) + + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + def test_month_locale(self): + # Test for month directives + self.roundtrip('%B', 1) + self.roundtrip('%b', 1) + for m in range(1, 13): + self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) def test_day(self): # Test for day directives - self.helper('d', 2) + self.roundtrip('%d %Y', 2) def test_hour(self): # Test hour directives - self.helper('H', 3) - strf_output = time.strftime("%I %p", self.time_tuple) - strp_output = _strptime._strptime_time(strf_output, "%I %p") - self.assertTrue(strp_output[3] == self.time_tuple[3], - "testing of '%%I %%p' directive failed; '%s' -> %s != %s" % - (strf_output, strp_output[3], self.time_tuple[3])) + self.roundtrip('%H', 3) + + # NB: Only works on locales with AM/PM + @run_with_locales('LC_TIME', 'C', 'en_US', 'ja_JP') + def test_hour_locale(self): + # Test hour directives + self.roundtrip('%I %p', 3) def test_minute(self): # Test minute directives - self.helper('M', 4) + self.roundtrip('%M', 4) def test_second(self): # Test second directives - self.helper('S', 5) + self.roundtrip('%S', 5) def test_fraction(self): # Test microseconds @@ -347,12 +361,18 @@ def test_fraction(self): def test_weekday(self): # Test weekday directives - for directive in ('A', 'a', 'w', 'u'): - self.helper(directive,6) + self.roundtrip('%w', 6) + self.roundtrip('%u', 6) + + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', '') + def test_weekday_locale(self): + # Test weekday directives + self.roundtrip('%A', 6) + self.roundtrip('%a', 6) def test_julian(self): # Test julian directives - self.helper('j', 7) + self.roundtrip('%j', 7) def test_offset(self): one_hour = 60 * 60 @@ -449,20 +469,96 @@ def test_bad_timezone(self): "time.daylight set to %s and passing in %s" % (time.tzname, tz_value, time.daylight, tz_name)) - def test_date_time(self): + # NB: Does not roundtrip in some locales due to the ambiguity of + # the date and time representation (bugs in locales?): + # * Seconds are not included: bem_ZM, bokmal, ff_SN, nb_NO, nn_NO, + # no_NO, norwegian, nynorsk. + # * Hours are in 12-hour notation without AM/PM indication: hy_AM, + # id_ID, ms_MY. + # * Year is not included: ha_NG. + # * Use non-Gregorian calendar: lo_LA, thai, th_TH. + # + # BUG: Generates invalid regexp for br_FR, csb_PL, Arabic. + # BUG: Generates regexp that does not match the current date and time + # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. + # BUG: Generates regexp that does not match the current date and time + # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG, + # fr_FR, ja_JP, he_IL, ko_KR, zh_CN, etc. + @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', + 'eu_ES', 'mfe_MU') + def test_date_time_locale(self): # Test %c directive - for position in range(6): - self.helper('c', position) - - def test_date(self): + now = time.time() + self.roundtrip('%c', slice(0, 6), time.localtime(now)) + # 1 hour 20 minutes 30 seconds ago + self.roundtrip('%c', slice(0, 6), time.localtime(now - 4830)) + # 12 hours ago + self.roundtrip('%c', slice(0, 6), time.localtime(now - 12*3600)) + # different days of the week + for i in range(1, 7): + self.roundtrip('%c', slice(0, 6), time.localtime(now - i*24*3600)) + # different months + for i in range(1, 12): + self.roundtrip('%c', slice(0, 6), time.localtime(now - i*30*24*3600)) + # different year + self.roundtrip('%c', slice(0, 6), time.localtime(now - 366*24*3600)) + + # NB: Dates before 1969 do not roundtrip on some locales: + # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. + @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', 'ja_JP') + def test_date_time_locale2(self): + # Test %c directive + self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + + # NB: Does not roundtrip because use non-Gregorian calendar: + # lo_LA, thai, th_TH. + # BUG: Generates regexp that does not match the current date + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM, + # Arabic, ja_JP, ko_KR, zh_CN, etc. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', + 'he_IL', 'eu_ES') + def test_date_locale(self): # Test %x directive - for position in range(0,3): - self.helper('x', position) - - def test_time(self): + now = time.time() + self.roundtrip('%x', slice(0, 3), time.localtime(now)) + # different days of the week + for i in range(1, 7): + self.roundtrip('%x', slice(0, 3), time.localtime(now - i*24*3600)) + # different months + for i in range(1, 12): + self.roundtrip('%x', slice(0, 3), time.localtime(now - i*30*24*3600)) + # different year + self.roundtrip('%x', slice(0, 3), time.localtime(now - 366*24*3600)) + + # NB: Dates before 1969 do not roundtrip on many locales, including C. + @unittest.skipIf( + support.is_emscripten or support.is_wasi, + "musl libc issue on Emscripten, bpo-46390" + ) + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + def test_date_locale2(self): + # Test %x directive + self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + + # NB: Does not roundtrip in some locales due to the ambiguity of + # the time representation (bugs in locales?): + # * Seconds are not included: bokmal, ff_SN, nb_NO, nn_NO, no_NO, + # norwegian, nynorsk. + # * Hours are in 12-hour notation without AM/PM indication: hy_AM, + # ms_MY, sm_WS. + # BUG: Generates regexp that does not match the current time for + # aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET, + # lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET, + # so_SO, ti_ER, ti_ET, tig_ER, wal_ET. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + def test_time_locale(self): # Test %X directive - for position in range(3,6): - self.helper('X', position) + now = time.time() + self.roundtrip('%X', slice(3, 6), time.localtime(now)) + # 1 hour 20 minutes 30 seconds ago + self.roundtrip('%X', slice(3, 6), time.localtime(now - 4830)) + # 12 hours ago + self.roundtrip('%X', slice(3, 6), time.localtime(now - 12*3600)) def test_percent(self): # Make sure % signs are handled properly @@ -714,12 +810,7 @@ def test_new_localetime(self): def test_TimeRE_recreation_locale(self): # The TimeRE instance should be recreated upon changing the locale. - locale_info = locale.getlocale(locale.LC_TIME) - try: - locale.setlocale(locale.LC_TIME, ('en_US', 'UTF8')) - except locale.Error: - self.skipTest('test needs en_US.UTF8 locale') - try: + with support.run_with_locale('LC_TIME', 'en_US.UTF8'): _strptime._strptime_time('10 2004', '%d %Y') # Get id of current cache object. first_time_re = _strptime._TimeRE_cache @@ -736,10 +827,6 @@ def test_TimeRE_recreation_locale(self): # to the resetting to the original locale. except locale.Error: self.skipTest('test needs de_DE.UTF8 locale') - # Make sure we don't trample on the locale setting once we leave the - # test. - finally: - locale.setlocale(locale.LC_TIME, locale_info) @support.run_with_tz('STD-1DST,M4.1.0,M10.1.0') def test_TimeRE_recreation_timezone(self): diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 530c317a852e77..5b5779231f06ce 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -2,7 +2,6 @@ from test.support import warnings_helper import decimal import enum -import locale import math import platform import sys @@ -600,17 +599,8 @@ def test_get_clock_info(self): class TestLocale(unittest.TestCase): - def setUp(self): - self.oldloc = locale.setlocale(locale.LC_ALL) - - def tearDown(self): - locale.setlocale(locale.LC_ALL, self.oldloc) - + @support.run_with_locale('LC_ALL', 'fr_FR', '') def test_bug_3061(self): - try: - tmp = locale.setlocale(locale.LC_ALL, "fr_FR") - except locale.Error: - self.skipTest('could not set locale.LC_ALL to fr_FR') # This should not cause an exception time.strftime("%B", (2009,2,1,0,0,0,0,0,0)) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 2844047bd8423e..128b1ae05e91cc 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -399,7 +399,7 @@ def test(i, format_spec, result): test(123456, "1=20", '11111111111111123456') test(123456, "*=20", '**************123456') - @run_with_locale('LC_NUMERIC', 'en_US.UTF8') + @run_with_locale('LC_NUMERIC', 'en_US.UTF8', '') def test_float__format__locale(self): # test locale support for __format__ code 'n' @@ -408,7 +408,7 @@ def test_float__format__locale(self): self.assertEqual(locale.format_string('%g', x, grouping=True), format(x, 'n')) self.assertEqual(locale.format_string('%.10g', x, grouping=True), format(x, '.10n')) - @run_with_locale('LC_NUMERIC', 'en_US.UTF8') + @run_with_locale('LC_NUMERIC', 'en_US.UTF8', '') def test_int__format__locale(self): # test locale support for __format__ code 'n' for integers