pandas-dev · jreback · Nov 30, 2020 · Apr 28, 2020 · Apr 28, 2020 · Apr 28, 2020
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -48,11 +48,13 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.generic import (
+    ABCDatetimeArray,
     ABCExtensionArray,
     ABCIndexClass,
     ABCMultiIndex,
     ABCRangeIndex,
     ABCSeries,
+    ABCTimedeltaArray,
 )
 from pandas.core.dtypes.missing import isna, na_value_for_dtype
 
@@ -199,8 +201,16 @@ def _reconstruct_data(
     -------
     ExtensionArray or np.ndarray
     """
+    if isinstance(values, ABCExtensionArray) and values.dtype == dtype:
+        # Catch DatetimeArray/TimedeltaArray
+        return values
+
     if is_extension_array_dtype(dtype):
-        values = dtype.construct_array_type()._from_sequence(values)
+        cls = dtype.construct_array_type()
+        if isinstance(values, cls) and values.dtype == dtype:
+            return values
+
+        values = cls._from_sequence(values)
     elif is_bool_dtype(dtype):
         values = values.astype(dtype, copy=False)
 
@@ -674,8 +684,13 @@ def factorize(
     # responsible only for factorization. All data coercion, sorting and boxing
     # should happen here.
 
+    if isinstance(values, ABCRangeIndex):
+        return values.factorize(sort=sort)
+
     values = _ensure_arraylike(values)
     original = values
+    if not isinstance(values, ABCMultiIndex):
+        values = extract_array(values, extract_numpy=True)
 
     # GH35667, if na_sentinel=None, we will not dropna NaNs from the uniques
     # of values, assign na_sentinel=-1 to replace code value for NaN.
@@ -684,10 +699,20 @@ def factorize(
         na_sentinel = -1
         dropna = False
 
-    if isinstance(values, ABCRangeIndex):
-        return values.factorize(sort=sort)
-    elif is_extension_array_dtype(values.dtype):
-        values = extract_array(values)
+    if (
+        isinstance(values, (ABCDatetimeArray, ABCTimedeltaArray))
+        and values.freq is not None
+    ):
+        codes, uniques = values.factorize(sort=sort)
+        if isinstance(original, ABCIndexClass):
+            uniques = original._shallow_copy(uniques, name=None)
+        elif isinstance(original, ABCSeries):
+            from pandas import Index
+
+            uniques = Index(uniques)
+        return codes, uniques
+
+    if is_extension_array_dtype(values.dtype):
         codes, uniques = values.factorize(na_sentinel=na_sentinel)
         dtype = original.dtype
     else:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1645,6 +1645,24 @@ def _with_freq(self, freq):
         arr._freq = freq
         return arr
 
+    # --------------------------------------------------------------
+
+    def factorize(self, na_sentinel=-1, sort: bool = False):
+        if self.freq is not None:
+            # We must be unique, so can short-circuit (and retain freq)
+            codes = np.arange(len(self), dtype=np.intp)
+            uniques = self.copy()  # TODO: copy or view?
+            if sort and self.freq.n < 0:
+                codes = codes[::-1]
+                # TODO: overload __getitem__, a slice indexer returns same type as self
+                # error: Incompatible types in assignment (expression has type
+                # "Union[DatetimeLikeArrayMixin, Union[Any, Any]]", variable
+                # has type "TimelikeOps")  [assignment]
+                uniques = uniques[::-1]  # type: ignore[assignment]
+            return codes, uniques
+        # FIXME: shouldn't get here; we are ignoring sort
+        return super().factorize(na_sentinel=na_sentinel)
+
 
 # -------------------------------------------------------------------
 # Shared Constructor Helpers

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -265,10 +265,12 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         # tz must be preserved
         idx1 = idx1.tz_localize("Asia/Tokyo")
@@ -277,6 +279,7 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         idx2 = DatetimeIndex(
             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
@@ -287,49 +290,65 @@ def test_factorize(self):
         arr, idx = idx2.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
         exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
         arr, idx = idx2.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#38120 freq should be preserved
         idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
+
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
-    def test_factorize_tz(self, tz_naive_fixture):
+    def test_factorize_tz(self, tz_naive_fixture, index_or_series):
         tz = tz_naive_fixture
         # GH#13750
         base = date_range("2016-11-05", freq="H", periods=100, tz=tz)
         idx = base.repeat(5)
 
         exp_arr = np.arange(100, dtype=np.intp).repeat(5)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, exp_arr)
-            expected = base._with_freq(None)
-            tm.assert_index_equal(res, expected)
+        obj = index_or_series(idx)
+
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        expected = base._with_freq(None)
+        tm.assert_index_equal(res, expected)
+        assert res.freq == expected.freq
 
-    def test_factorize_dst(self):
+    def test_factorize_dst(self, index_or_series):
         # GH 13750
         idx = date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
 
         idx = date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
 
     @pytest.mark.parametrize(
         "arr, expected",

diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -75,17 +75,26 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#38120 freq should be preserved
         idx3 = timedelta_range("1 day", periods=4, freq="s")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
     def test_sort_values(self):
 

diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -83,3 +83,13 @@ def test_nested_tuples_duplicates(self):
         df3 = df.copy(deep=True)
         df3.loc[[(dti[0], "a")], "c2"] = 1.0
         tm.assert_frame_equal(df3, expected)
+
+    def test_multiindex_with_datatime_level_preserves_freq(self):
+        # https://github.com/pandas-dev/pandas/issues/35563
+        idx = Index(range(2), name="A")
+        dti = pd.date_range("2020-01-01", periods=7, freq="D", name="B")
+        mi = MultiIndex.from_product([idx, dti])
+        df = DataFrame(np.random.randn(14, 2), index=mi)
+        result = df.loc[0].index
+        tm.assert_index_equal(result, dti)
+        assert result.freq == dti.freq
diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py
@@ -11,7 +11,6 @@ def test_ewm_pairwise_cov_corr(func, frame):
     result = result.loc[(slice(None), 1), 5]
     result.index = result.index.droplevel(1)
     expected = getattr(frame[1].ewm(span=10, min_periods=5), func)(frame[5])
-    expected.index = expected.index._with_freq(None)
     tm.assert_series_equal(result, expected, check_names=False)
 
 

diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py
@@ -51,7 +51,6 @@ def test_rolling_pairwise_cov_corr(func, frame):
     result = result.loc[(slice(None), 1), 5]
     result.index = result.index.droplevel(1)
     expected = getattr(frame[1].rolling(window=10, min_periods=5), func)(frame[5])
-    expected.index = expected.index._with_freq(None)
     tm.assert_series_equal(result, expected, check_names=False)