From 6d741b1244d3b65620a01d21359cf5478384fdbf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 27 Apr 2020 17:57:59 -0700 Subject: [PATCH 01/12] BUG: preserve freq in DTI/TDI factorize --- pandas/core/algorithms.py | 13 ++++++ pandas/core/arrays/datetimelike.py | 8 ++++ pandas/core/base.py | 12 +++++- pandas/core/indexes/datetimelike.py | 9 ++++ .../tests/indexes/datetimes/test_datetime.py | 42 ++++++++++++------- .../indexes/timedeltas/test_timedelta.py | 3 ++ 6 files changed, 71 insertions(+), 16 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index eca1733b61a52..78b49babb2ab5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -46,11 +46,15 @@ needs_i8_conversion, ) from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCDatetimeIndex, ABCExtensionArray, ABCIndex, ABCIndexClass, ABCMultiIndex, ABCSeries, + ABCTimedeltaArray, + ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -614,6 +618,15 @@ def factorize( values = _ensure_arraylike(values) original = values + if isinstance( + values, + (ABCDatetimeIndex, ABCTimedeltaIndex, ABCDatetimeArray, ABCTimedeltaArray), + ): + # Defer to the method in order to retain freq + if sort is False and size_hint is None: + # EA/Index methods dont support the same kwargs as this func + return values.factorize(na_sentinel=na_sentinel) + if is_extension_array_dtype(values.dtype): values = extract_array(values) codes, uniques = values.factorize(na_sentinel=na_sentinel) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c325ec0d0bf7c..c28d3a9db959b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -437,6 +437,14 @@ def _with_freq(self, freq): arr._freq = freq return arr + def factorize(self, na_sentinel=-1): + if self.freq is not None: + # We must be unique, so can short-circuit (and retain freq) + codes = np.arange(len(self)) + # TOOD: copy? + return codes, self[:] + return super().factorize(na_sentinel=na_sentinel) + DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") diff --git a/pandas/core/base.py b/pandas/core/base.py index ee514888c6331..5070f2f0cbd98 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1402,7 +1402,17 @@ def memory_usage(self, deep=False): ), ) def factorize(self, sort=False, na_sentinel=-1): - return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) + codes, uniques = algorithms.factorize( + self._values, sort=sort, na_sentinel=na_sentinel + ) + if isinstance(self, ABCIndexClass): + # use constructor instead of Index to get MultiIndex right + uniques = self._constructor(uniques) + else: + from pandas import Index + + uniques = Index(uniques) + return codes, uniques _shared_docs[ "searchsorted" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8295ca13c33b1..ae7fa00b38716 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -682,6 +682,15 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): result._cache = cache return result + def factorize(self, sort=False, na_sentinel=-1): + if self.freq is not None and sort is False: + # we are unique, so can short-circuit, also can preserve freq + codes = np.arange(len(self)) + return codes, self[:] + # TODO: In the sort=True case we could check for montonic_decreasing + # and operate on self[::-1] + return super().factorize(sort=sort, na_sentinel=na_sentinel) + # -------------------------------------------------------------------- # Set Operation Methods diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e0e5beaf48e20..737d1f3313dde 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -328,10 +328,12 @@ def test_factorize(self): arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq arr, idx = idx1.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq # tz must be preserved idx1 = idx1.tz_localize("Asia/Tokyo") @@ -340,6 +342,7 @@ def test_factorize(self): arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq idx2 = pd.DatetimeIndex( ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"] @@ -350,12 +353,14 @@ def test_factorize(self): arr, idx = idx2.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) arr, idx = idx2.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq # freq must be preserved idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") @@ -363,8 +368,9 @@ def test_factorize(self): arr, idx = idx3.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq - def test_factorize_tz(self, tz_naive_fixture): + def test_factorize_tz(self, tz_naive_fixture, index_or_series): tz = tz_naive_fixture # GH#13750 base = pd.date_range("2016-11-05", freq="H", periods=100, tz=tz) @@ -372,27 +378,33 @@ def test_factorize_tz(self, tz_naive_fixture): exp_arr = np.arange(100, dtype=np.intp).repeat(5) - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, exp_arr) - expected = base._with_freq(None) - tm.assert_index_equal(res, expected) + obj = index_or_series(idx) - def test_factorize_dst(self): + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + expected = base._with_freq(None) + tm.assert_index_equal(res, expected) + assert res.freq == expected.freq + + def test_factorize_dst(self, index_or_series): # GH 13750 idx = pd.date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq idx = pd.date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern") + obj = index_or_series(idx) - for obj in [idx, pd.Series(idx)]: - arr, res = obj.factorize() - tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) - tm.assert_index_equal(res, idx) + arr, res = obj.factorize() + tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp)) + tm.assert_index_equal(res, idx) + if index_or_series is Index: + assert res.freq == idx.freq @pytest.mark.parametrize( "arr, expected", diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 5efa1a75700e0..2495250749fe3 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -76,10 +76,12 @@ def test_factorize(self): arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq arr, idx = idx1.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) + assert idx.freq == exp_idx.freq # freq must be preserved idx3 = timedelta_range("1 day", periods=4, freq="s") @@ -87,6 +89,7 @@ def test_factorize(self): arr, idx = idx3.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq def test_sort_values(self): From a553174cf6216a962294e14e9aa96f50fee5fbc5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 27 Apr 2020 19:20:11 -0700 Subject: [PATCH 02/12] mypy fixup --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c28d3a9db959b..181ebd7453982 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -443,7 +443,7 @@ def factorize(self, na_sentinel=-1): codes = np.arange(len(self)) # TOOD: copy? return codes, self[:] - return super().factorize(na_sentinel=na_sentinel) + return ExtensionArray.factorize(self, na_sentinel=na_sentinel) DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") From 23911efbe84c2b4c741f4a865b86991e54227609 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Apr 2020 07:38:43 -0700 Subject: [PATCH 03/12] dummy commit to force CI From 0e51930df0b9c205b46e73b172330f7ba462ae42 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Apr 2020 09:36:09 -0700 Subject: [PATCH 04/12] refactor per joris suggestion --- pandas/core/algorithms.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 78b49babb2ab5..61d87cd0654d5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -46,15 +46,11 @@ needs_i8_conversion, ) from pandas.core.dtypes.generic import ( - ABCDatetimeArray, - ABCDatetimeIndex, ABCExtensionArray, ABCIndex, ABCIndexClass, ABCMultiIndex, ABCSeries, - ABCTimedeltaArray, - ABCTimedeltaIndex, ) from pandas.core.dtypes.missing import isna, na_value_for_dtype @@ -617,18 +613,11 @@ def factorize( values = _ensure_arraylike(values) original = values + if not isinstance(values, ABCMultiIndex): + values = extract_array(values, extract_numpy=True) - if isinstance( - values, - (ABCDatetimeIndex, ABCTimedeltaIndex, ABCDatetimeArray, ABCTimedeltaArray), - ): - # Defer to the method in order to retain freq - if sort is False and size_hint is None: - # EA/Index methods dont support the same kwargs as this func - return values.factorize(na_sentinel=na_sentinel) - - if is_extension_array_dtype(values.dtype): - values = extract_array(values) + if isinstance(values, ABCExtensionArray): + # Includes DatetimeArray, TimedeltaArray codes, uniques = values.factorize(na_sentinel=na_sentinel) dtype = original.dtype else: From 678251db4cb40306136db9b4b3166acc38c63b6c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Apr 2020 11:01:33 -0700 Subject: [PATCH 05/12] 32bit compat --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/indexes/datetimelike.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 181ebd7453982..3649cc685bb7c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -440,7 +440,7 @@ def _with_freq(self, freq): def factorize(self, na_sentinel=-1): if self.freq is not None: # We must be unique, so can short-circuit (and retain freq) - codes = np.arange(len(self)) + codes = np.arange(len(self), dtype=np.intp) # TOOD: copy? return codes, self[:] return ExtensionArray.factorize(self, na_sentinel=na_sentinel) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 3ba40e3b8b7b4..292e42993bd50 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -605,7 +605,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default): def factorize(self, sort=False, na_sentinel=-1): if self.freq is not None and sort is False: # we are unique, so can short-circuit, also can preserve freq - codes = np.arange(len(self)) + codes = np.arange(len(self), dtype=np.intp) return codes, self[:] # TODO: In the sort=True case we could check for montonic_decreasing # and operate on self[::-1] From abb5913f38d4c70cd972221b5252f68eba7f8192 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Apr 2020 12:19:02 -0700 Subject: [PATCH 06/12] return copy --- pandas/core/arrays/datetimelike.py | 3 +-- pandas/core/base.py | 12 +----------- pandas/core/indexes/datetimelike.py | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3649cc685bb7c..22ffd00ca1393 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -441,8 +441,7 @@ def factorize(self, na_sentinel=-1): if self.freq is not None: # We must be unique, so can short-circuit (and retain freq) codes = np.arange(len(self), dtype=np.intp) - # TOOD: copy? - return codes, self[:] + return codes, self.copy() return ExtensionArray.factorize(self, na_sentinel=na_sentinel) diff --git a/pandas/core/base.py b/pandas/core/base.py index 5070f2f0cbd98..ee514888c6331 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1402,17 +1402,7 @@ def memory_usage(self, deep=False): ), ) def factorize(self, sort=False, na_sentinel=-1): - codes, uniques = algorithms.factorize( - self._values, sort=sort, na_sentinel=na_sentinel - ) - if isinstance(self, ABCIndexClass): - # use constructor instead of Index to get MultiIndex right - uniques = self._constructor(uniques) - else: - from pandas import Index - - uniques = Index(uniques) - return codes, uniques + return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) _shared_docs[ "searchsorted" diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 292e42993bd50..c6f79de61053e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -606,7 +606,7 @@ def factorize(self, sort=False, na_sentinel=-1): if self.freq is not None and sort is False: # we are unique, so can short-circuit, also can preserve freq codes = np.arange(len(self), dtype=np.intp) - return codes, self[:] + return codes, self.copy() # TODO: In the sort=True case we could check for montonic_decreasing # and operate on self[::-1] return super().factorize(sort=sort, na_sentinel=na_sentinel) From 7c6638956779a594c6645bd183645e8d2eb5bebd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 28 Apr 2020 18:07:39 -0700 Subject: [PATCH 07/12] preserve freq in pd.factorize --- pandas/core/algorithms.py | 10 +++++++++- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/tests/indexes/datetimes/test_datetime.py | 9 ++++++++- pandas/tests/indexes/timedeltas/test_timedelta.py | 8 +++++++- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 61d87cd0654d5..7ac9cef0d2412 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -184,8 +184,16 @@ def _reconstruct_data(values, dtype, original): ------- Index for extension types, otherwise ndarray casted to dtype """ + if isinstance(values, ABCExtensionArray) and values.dtype == dtype: + # Catch DatetimeArray/TimedeltaArray + return values + if is_extension_array_dtype(dtype): - values = dtype.construct_array_type()._from_sequence(values) + cls = dtype.construct_array_type() + if isinstance(values, cls) and values.dtype == dtype: + return values + + values = cls._from_sequence(values) elif is_bool_dtype(dtype): values = values.astype(dtype, copy=False) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e3fbb906ed6b1..222e9d8edd454 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -112,7 +112,7 @@ def f(self): return property(f) -class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps): +class DatetimeArray(dtl.TimelikeOps, dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): """ Pandas ExtensionArray for tz-naive or tz-aware datetime data. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a460d07e1f6f2..cc5ad95036a1c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -57,7 +57,7 @@ def f(self): return property(f) -class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): +class TimedeltaArray(dtl.TimelikeOps, dtl.DatetimeLikeArrayMixin): """ Pandas ExtensionArray for timedelta data. diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 737d1f3313dde..aa3a963efb735 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -362,14 +362,21 @@ def test_factorize(self): tm.assert_index_equal(idx, exp_idx) assert idx.freq == exp_idx.freq - # freq must be preserved + def test_factorize_preserves_freq(self): + # GH#33836 freq should be preserved idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) assert idx.freq == idx3.freq + arr, idx = pd.factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + def test_factorize_tz(self, tz_naive_fixture, index_or_series): tz = tz_naive_fixture # GH#13750 diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 2495250749fe3..fa82ccb68989b 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -83,7 +83,8 @@ def test_factorize(self): tm.assert_index_equal(idx, exp_idx) assert idx.freq == exp_idx.freq - # freq must be preserved + def test_factorize_preserves_freq(self): + # GH#33836 freq should be preserved idx3 = timedelta_range("1 day", periods=4, freq="s") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() @@ -91,6 +92,11 @@ def test_factorize(self): tm.assert_index_equal(idx, idx3) assert idx.freq == idx3.freq + arr, idx = pd.factorize(idx3) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + assert idx.freq == idx3.freq + def test_sort_values(self): idx = TimedeltaIndex(["4d", "1d", "2d"]) From 4a5b4ac86c55ef47b7b2b310676813c8cbb0eaed Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 27 Nov 2020 12:12:20 -0800 Subject: [PATCH 08/12] revert unnecessary --- pandas/core/arrays/datetimelike.py | 1 + pandas/core/indexes/datetimelike.py | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 937fe7d298e60..5e8facecfdeb6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1656,6 +1656,7 @@ def factorize(self, na_sentinel=-1, sort: bool = False): codes = codes[::-1] uniques = uniques[::-1] return codes, uniques + # FIXME: shouldn't get here; we are ignoring sort return super().factorize(na_sentinel=na_sentinel) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 39f857ed5a2b8..1b18f04ba603d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -649,15 +649,6 @@ def _has_complex_internals(self) -> bool: def is_type_compatible(self, kind: str) -> bool: return kind in self._data._infer_matches - def factorize(self, sort=False, na_sentinel=-1): - if self.freq is not None and sort is False: - # we are unique, so can short-circuit, also can preserve freq - codes = np.arange(len(self), dtype=np.intp) - return codes, self.copy() - # TODO: In the sort=True case we could check for montonic_decreasing - # and operate on self[::-1] - return super().factorize(sort=sort, na_sentinel=na_sentinel) - # -------------------------------------------------------------------- # Set Operation Methods From b8b99cbcbf159db4232ba57e254a8e814236e3b9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 28 Nov 2020 17:36:15 +0000 Subject: [PATCH 09/12] ignore mypy error --- pandas/core/arrays/datetimelike.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5e8facecfdeb6..979ca70bad9b3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1654,7 +1654,11 @@ def factorize(self, na_sentinel=-1, sort: bool = False): uniques = self.copy() # TODO: copy or view? if sort and self.freq.n < 0: codes = codes[::-1] - uniques = uniques[::-1] + # TODO: overload __getitem__, a slice indexer returns same type as self + # error: Incompatible types in assignment (expression has type + # "Union[DatetimeLikeArrayMixin, Union[Any, Any]]", variable + # has type "TimelikeOps") [assignment] + uniques = uniques[::-1] # type: ignore[assignment] return codes, uniques # FIXME: shouldn't get here; we are ignoring sort return super().factorize(na_sentinel=na_sentinel) From 3a2dfc70a3b80f515e8674a913a434e3a19c8c2a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 28 Nov 2020 18:17:15 +0000 Subject: [PATCH 10/12] add test --- pandas/tests/indexing/multiindex/test_multiindex.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index a3b8d66c92024..9a3039c28416c 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -83,3 +83,13 @@ def test_nested_tuples_duplicates(self): df3 = df.copy(deep=True) df3.loc[[(dti[0], "a")], "c2"] = 1.0 tm.assert_frame_equal(df3, expected) + + def test_multiindex_with_datatime_level_preserves_freq(self): + # https://github.com/pandas-dev/pandas/issues/35563 + idx = Index(range(2), name="A") + dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B") + mi = MultiIndex.from_product([idx, dti]) + df = DataFrame(np.random.randn(14, 2), index=mi) + result = df.loc[0].index + tm.assert_index_equal(result, dti) + assert result.freq == dti.freq From 82db4ad1e98714c52e51737ff313c40bd94f7011 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 28 Nov 2020 18:18:26 +0000 Subject: [PATCH 11/12] update issue numbers --- pandas/tests/indexes/datetimes/test_datetime.py | 2 +- pandas/tests/indexes/timedeltas/test_timedelta.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index bd20a9a512cd2..789510b452969 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -300,7 +300,7 @@ def test_factorize(self): assert idx.freq == exp_idx.freq def test_factorize_preserves_freq(self): - # GH#33836 freq should be preserved + # GH#38120 freq should be preserved idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index f7618ccc44122..f0e730eecf3d5 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -83,7 +83,7 @@ def test_factorize(self): assert idx.freq == exp_idx.freq def test_factorize_preserves_freq(self): - # GH#33836 freq should be preserved + # GH#38120 freq should be preserved idx3 = timedelta_range("1 day", periods=4, freq="s") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() From a90778139ae20709eaede217156b928460bf301f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 29 Nov 2020 18:32:49 +0000 Subject: [PATCH 12/12] add whatsnew --- doc/source/whatsnew/v1.1.5.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 46c4ad4f35fe4..22f2303062124 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` for ``__setitem__`` when one-dimensional tuple was given to select from :class:`MultiIndex` (:issue:`37711`) - Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) - Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) +- Fixed regression in :class:`MultiIndex` constructed from a :class:`DatetimeIndex` not retaining frequency (:issue:`35563`) - Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) - Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`) - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)