From 96027152e94e0a91b114821480e94104246aaa02 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 7 Dec 2023 09:03:36 -0500
Subject: [PATCH 01/14] Support `freq` in DatetimeIndex

---
 python/cudf/cudf/core/index.py           | 41 ++++++++++++++++++++----
 python/cudf/cudf/core/tools/datetimes.py |  6 +---
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 98d537b2a0f..7a9fed86580 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1433,9 +1433,10 @@ def __repr__(self):
         if self.name is not None:
             lines[-1] = lines[-1] + ", name='%s'" % self.name
         if "length" in tmp_meta:
-            lines[-1] = lines[-1] + ", length=%d)" % len(self)
-        else:
-            lines[-1] = lines[-1] + ")"
+            lines[-1] = lines[-1] + ", length=%d" % len(self)
+        if "freq" in tmp_meta and self._freq is not None:
+            lines[-1] = lines[-1] + f", freq={self._freq}"
+        lines[-1] = lines[-1] + ")"
 
         return "\n".join(lines)
 
@@ -2127,8 +2128,6 @@ def __init__(
         # pandas dtindex creation first which.  For now
         # just make sure we handle np.datetime64 arrays
         # and then just dispatch upstream
-        if freq is not None:
-            raise NotImplementedError("Freq is not yet supported")
         if tz is not None:
             raise NotImplementedError("tz is not yet supported")
         if normalize is not False:
@@ -2142,6 +2141,8 @@ def __init__(
         if yearfirst is not False:
             raise NotImplementedError("yearfirst == True is not yet supported")
 
+        self._freq = _validate_freq(freq)
+
         valid_dtypes = tuple(
             f"datetime64[{res}]" for res in ("s", "ms", "us", "ns")
         )
@@ -2159,6 +2160,19 @@ def __init__(
 
         super().__init__(data, **kwargs)
 
+        if self._freq is not None:
+            unique_vals = self[1:] - self[:-1]
+            if len(unique_vals) != 1 or unique_vals[0] != self._freq:
+                raise ValueError()
+
+    @classmethod
+    def _from_data(
+        cls, data: MutableMapping, name: Any = no_default, freq: Any = None
+    ):
+        result = super()._from_data(data, name)
+        result._freq = _validate_freq(freq)
+        return result
+
     def __getitem__(self, index):
         value = super().__getitem__(index)
         if cudf.get_option("mode.pandas_compatible") and isinstance(
@@ -2520,7 +2534,13 @@ def to_pandas(self, *, nullable: bool = False) -> pd.DatetimeIndex:
             )
         else:
             nanos = self._values.astype("datetime64[ns]")
-        return pd.DatetimeIndex(nanos.to_pandas(), name=self.name)
+
+        freq = (
+            self._freq._maybe_as_fast_pandas_offset()
+            if self._freq is not None
+            else None
+        )
+        return pd.DatetimeIndex(nanos.to_pandas(), name=self.name, freq=freq)
 
     @_cudf_nvtx_annotate
     def _get_dt_field(self, field):
@@ -3625,3 +3645,12 @@ def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]:
         old_s, s = s, old_s - quotient * s
         old_t, t = t, old_t - quotient * t
     return old_r, old_s, old_t
+
+
+def _validate_freq(freq: Any) -> cudf.DateOffset:
+    if isinstance(freq, str):
+        return cudf.DateOffset._from_freqstr(freq)
+    elif freq is not None:
+        if not isinstance(freq, cudf.DateOffset):
+            raise ValueError(f"Invalid frequency: {freq}")
+    return freq
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 14b27e179a2..4ac15773fbf 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -843,10 +843,6 @@ def date_range(
         arr = cp.linspace(start=start, stop=end, num=periods)
         result = cudf.core.column.as_column(arr).astype("datetime64[ns]")
         return cudf.DatetimeIndex._from_data({name: result})
-    elif cudf.get_option("mode.pandas_compatible"):
-        raise NotImplementedError(
-            "`DatetimeIndex` with `freq` cannot be constructed."
-        )
 
     # The code logic below assumes `freq` is defined. It is first normalized
     # into `DateOffset` for further computation with timestamps.
@@ -940,7 +936,7 @@ def date_range(
         arr = cp.arange(start=start, stop=stop, step=step, dtype="int64")
         res = cudf.core.column.as_column(arr).astype("datetime64[ns]")
 
-    return cudf.DatetimeIndex._from_data({name: res})
+    return cudf.DatetimeIndex._from_data({name: res}, freq=freq)
 
 
 def _has_fixed_frequency(freq: DateOffset) -> bool:

From 98e5e1ee20be9cb12938377d6b3a788fd28bf313 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 7 Dec 2023 11:12:43 -0500
Subject: [PATCH 02/14] "T" is minutes

---
 python/cudf/cudf/core/tools/datetimes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 4ac15773fbf..0e6b2ad35c9 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -469,6 +469,7 @@ class DateOffset:
         "L": "milliseconds",
         "s": "seconds",
         "m": "minutes",
+        "T": "minutes",
         "h": "hours",
         "D": "days",
         "W": "weeks",

From 6b0beee7336c96d201fba6fdc08ce551ba85525c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 7 Dec 2023 11:23:57 -0500
Subject: [PATCH 03/14] Add more string aliases

---
 python/cudf/cudf/core/tools/datetimes.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 0e6b2ad35c9..9030beeea3f 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -468,9 +468,12 @@ class DateOffset:
         "ms": "milliseconds",
         "L": "milliseconds",
         "s": "seconds",
+        "S": "seconds",
         "m": "minutes",
+        "min": "minutes",
         "T": "minutes",
         "h": "hours",
+        "H": "hours",
         "D": "days",
         "W": "weeks",
         "M": "months",

From 20ca2bb41db31cac3f803ae053d3de7f12ca4492 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 7 Dec 2023 12:20:54 -0500
Subject: [PATCH 04/14] Define resamplers

---
 python/cudf/cudf/pandas/_wrappers/pandas.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 193ef404a8c..c50e72b4b12 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -707,6 +707,14 @@ def Index__new__(cls, *args, **kwargs):
     "Resampler", cudf.core.resample._Resampler, pd_Resampler
 )
 
+DataFrameResampler = make_intermediate_proxy_type(
+    "DataFrameResampler", cudf.core.resample.DataFrameResampler, pd_Resampler
+)
+
+SeriesResampler = make_intermediate_proxy_type(
+    "SeriesResampler", cudf.core.resample.SeriesResampler, pd_Resampler
+)
+
 StataReader = make_intermediate_proxy_type(
     "StataReader",
     _Unusable,

From b461ecbc7bf96ed72900f330c5995e82b6446e6c Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 7 Dec 2023 17:33:32 +0000
Subject: [PATCH 05/14] fix metadata issues

---
 python/cudf/cudf/core/index.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 7a9fed86580..7d852206a87 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1228,7 +1228,8 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
         name = self.name if name is None else name
 
         col = self._values.astype(dtype)
-        return _index_from_data({name: col.copy(True) if deep else col})
+        idx_copy = _index_from_data({name: col.copy(True) if deep else col})
+        return idx_copy._copy_type_metadata(self)
 
     @_cudf_nvtx_annotate
     def astype(self, dtype, copy: bool = True):
@@ -2165,6 +2166,14 @@ def __init__(
             if len(unique_vals) != 1 or unique_vals[0] != self._freq:
                 raise ValueError()
 
+    @_cudf_nvtx_annotate
+    def _copy_type_metadata(
+        self: DatetimeIndex, other: DatetimeIndex, *, override_dtypes=None
+    ) -> GenericIndex:
+        super()._copy_type_metadata(other, override_dtypes=override_dtypes)
+        self._freq = other._freq
+        return self
+
     @classmethod
     def _from_data(
         cls, data: MutableMapping, name: Any = no_default, freq: Any = None

From 03378408afa33c6a4544782c153e1fa0227839ec Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 7 Dec 2023 20:29:13 +0000
Subject: [PATCH 06/14] Fix more cases

---
 python/cudf/cudf/core/index.py           | 8 ++++++--
 python/cudf/cudf/core/tools/datetimes.py | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 7d852206a87..dc263512dcc 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1228,8 +1228,7 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
         name = self.name if name is None else name
 
         col = self._values.astype(dtype)
-        idx_copy = _index_from_data({name: col.copy(True) if deep else col})
-        return idx_copy._copy_type_metadata(self)
+        return _index_from_data({name: col.copy(True) if deep else col})
 
     @_cudf_nvtx_annotate
     def astype(self, dtype, copy: bool = True):
@@ -2190,6 +2189,11 @@ def __getitem__(self, index):
             return pd.Timestamp(value)
         return value
 
+    @_cudf_nvtx_annotate
+    def copy(self, name=None, deep=False, dtype=None, names=None):
+        idx_copy = super().copy(name=name, deep=deep, dtype=dtype, names=names)
+        return idx_copy._copy_type_metadata(self)
+
     def searchsorted(
         self,
         value,
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 9030beeea3f..6ec9dcb5f44 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -463,7 +463,9 @@ class DateOffset:
     }
 
     _CODES_TO_UNITS = {
+        "N": "nanoseconds",
         "ns": "nanoseconds",
+        "U": "microseconds",
         "us": "microseconds",
         "ms": "milliseconds",
         "L": "milliseconds",
@@ -491,7 +493,7 @@ class DateOffset:
         pd_offset.Nano: "nanoseconds",
     }
 
-    _FREQSTR_REGEX = re.compile("([0-9]*)([a-zA-Z]+)")
+    _FREQSTR_REGEX = re.compile("([-+]?[0-9]*)([a-zA-Z]+)")
 
     def __init__(self, n=1, normalize=False, **kwds):
         if normalize:

From 957c7c5ea7b57f03f9dfe13126b465989c9efc75 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 7 Dec 2023 14:30:56 -0600
Subject: [PATCH 07/14] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf/cudf/core/index.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index dc263512dcc..9d50215a5bc 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3663,7 +3663,6 @@ def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]:
 def _validate_freq(freq: Any) -> cudf.DateOffset:
     if isinstance(freq, str):
         return cudf.DateOffset._from_freqstr(freq)
-    elif freq is not None:
-        if not isinstance(freq, cudf.DateOffset):
-            raise ValueError(f"Invalid frequency: {freq}")
+    elif freq is not None and not isinstance(freq, cudf.DateOffset):
+        raise ValueError(f"Invalid frequency: {freq}")
     return freq

From ed3ba3ff17cf686d1e6e38f01073d27b1be64799 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 7 Dec 2023 21:51:59 +0000
Subject: [PATCH 08/14] fix more cases

---
 python/cudf/cudf/core/index.py          |  9 ++-
 python/cudf/cudf/tests/test_datetime.py | 86 ++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 9d50215a5bc..15103d827ef 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -17,6 +17,7 @@
     Tuple,
     Type,
     Union,
+    cast,
 )
 
 import cupy
@@ -1434,7 +1435,11 @@ def __repr__(self):
             lines[-1] = lines[-1] + ", name='%s'" % self.name
         if "length" in tmp_meta:
             lines[-1] = lines[-1] + ", length=%d" % len(self)
-        if "freq" in tmp_meta and self._freq is not None:
+        if (
+            "freq" in tmp_meta
+            and isinstance(self, DatetimeIndex)
+            and self._freq is not None
+        ):
             lines[-1] = lines[-1] + f", freq={self._freq}"
         lines[-1] = lines[-1] + ")"
 
@@ -3665,4 +3670,4 @@ def _validate_freq(freq: Any) -> cudf.DateOffset:
         return cudf.DateOffset._from_freqstr(freq)
     elif freq is not None and not isinstance(freq, cudf.DateOffset):
         raise ValueError(f"Invalid frequency: {freq}")
-    return freq
+    return cast(cudf.DateOffset, freq)
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index ee0985a54dd..5b509e7ba86 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1571,6 +1571,44 @@ def test_date_range_start_end_freq(request, start, end, freq):
             reason="https://github.com/rapidsai/cudf/issues/12133",
         )
     )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and (
+                    (
+                        start == "1996-11-21 04:05:30"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1970-01-01 00:00:00"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1970-01-01 00:00:00"
+                        and end == "1996-11-21 04:05:30"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "2000-02-13 08:41:06"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "1996-11-21 04:05:30"
+                    )
+                    or (
+                        start == "1831-05-08 15:23:21"
+                        and end == "1970-01-01 00:00:00"
+                    )
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -1586,7 +1624,29 @@ def test_date_range_start_end_freq(request, start, end, freq):
     )
 
 
-def test_date_range_start_freq_periods(start, freq, periods):
+def test_date_range_start_freq_periods(request, start, freq, periods):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and periods in (10, 100)
+                and (
+                    start
+                    in {
+                        "2000-02-13 08:41:06",
+                        "1996-11-21 04:05:30",
+                        "1970-01-01 00:00:00",
+                        "1831-05-08 15:23:21",
+                    }
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -1613,6 +1673,28 @@ def test_date_range_end_freq_periods(request, end, freq, periods):
             reason="https://github.com/pandas-dev/pandas/issues/46877",
         )
     )
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                isinstance(freq, dict)
+                and freq.get("hours", None) == 10
+                and freq.get("days", None) == 57
+                and freq.get("nanoseconds", None) == 3
+                and periods in (10, 100)
+                and (
+                    end
+                    in {
+                        "2000-02-13 08:41:06",
+                        "1996-11-21 04:05:30",
+                        "1970-01-01 00:00:00",
+                        "1831-05-08 15:23:21",
+                    }
+                )
+            ),
+            reason="Nanosecond offsets being dropped by pandas, which is "
+            "fixed in pandas-2.0+",
+        )
+    )
     if isinstance(freq, str):
         _gfreq = _pfreq = freq
     else:
@@ -2163,8 +2245,6 @@ def test_datetime_getitem_na():
 
 def test_daterange_pandas_compatibility():
     with cudf.option_context("mode.pandas_compatible", True):
-        with pytest.raises(NotImplementedError):
-            cudf.date_range("20010101", "20020215", freq="400h", name="times")
         expected = pd.date_range(
             "2010-01-01", "2010-02-01", periods=10, name="times"
         )

From ce4f3bdffb3eb6400d415aa8e16bdf0a0ccc11a3 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 8 Dec 2023 05:17:10 +0000
Subject: [PATCH 09/14] address reviews

---
 python/cudf/cudf/core/index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 15103d827ef..3f791cb6b47 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2166,7 +2166,7 @@ def __init__(
         super().__init__(data, **kwargs)
 
         if self._freq is not None:
-            unique_vals = self[1:] - self[:-1]
+            unique_vals = self.diff().unique()
             if len(unique_vals) != 1 or unique_vals[0] != self._freq:
                 raise ValueError()
 

From cd00345fb6a995c42e0c7827a59f29224f73bc2d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 8 Dec 2023 08:37:52 -0600
Subject: [PATCH 10/14] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf/cudf/core/index.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index d5a59b92f6e..c8883e01b5d 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1431,16 +1431,16 @@ def __repr__(self):
         lines = lines[:-1]
         lines.append(prior_to_dtype + " dtype='%s'" % self.dtype)
         if self.name is not None:
-            lines[-1] = lines[-1] + ", name='%s'" % self.name
+            lines[-1] += f", name='{self.name}'"
         if "length" in tmp_meta:
-            lines[-1] = lines[-1] + ", length=%d" % len(self)
+            lines[-1] += f", length={len(self)}"
         if (
             "freq" in tmp_meta
             and isinstance(self, DatetimeIndex)
             and self._freq is not None
         ):
-            lines[-1] = lines[-1] + f", freq={self._freq}"
-        lines[-1] = lines[-1] + ")"
+            lines[-1] += f", freq={self._freq}"
+        lines[-1] += ")"
 
         return "\n".join(lines)
 

From 6d00347a78af681e9e73fc8fbdc92acc4da02799 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 8 Dec 2023 15:56:03 +0000
Subject: [PATCH 11/14] fix freq calculations

---
 python/cudf/cudf/core/index.py          |  9 ++++--
 python/cudf/cudf/tests/test_datetime.py | 42 ++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3f791cb6b47..23ab41a55a1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2166,9 +2166,12 @@ def __init__(
         super().__init__(data, **kwargs)
 
         if self._freq is not None:
-            unique_vals = self.diff().unique()
-            if len(unique_vals) != 1 or unique_vals[0] != self._freq:
-                raise ValueError()
+            unique_vals = self.to_series().diff().unique()
+            if len(unique_vals) > 2 or (
+                len(unique_vals) == 2
+                and unique_vals[1] != self._freq._maybe_as_fast_pandas_offset()
+            ):
+                raise ValueError("No unique frequency found")
 
     @_cudf_nvtx_annotate
     def _copy_type_metadata(
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 5b509e7ba86..07c8c407ab9 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -12,7 +12,7 @@
 import cudf
 import cudf.testing.dataset_generator as dataset_generator
 from cudf import DataFrame, Series
-from cudf.core._compat import PANDAS_GE_150, PANDAS_LT_140
+from cudf.core._compat import PANDAS_GE_150, PANDAS_GE_200, PANDAS_LT_140
 from cudf.core.index import DatetimeIndex
 from cudf.testing._utils import (
     DATETIME_TYPES,
@@ -2254,6 +2254,46 @@ def test_daterange_pandas_compatibility():
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10], "datetime64[ns]", "2N"),
+        ([10, 12, 14, 16], "datetime64[ns]", "2N"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1N"),
+        ([100, 200, 300, 400], "datetime64[s]", "100s"),
+        ([101, 201, 301, 401], "datetime64[ms]", "100ms"),
+    ],
+)
+def test_datetime_index_with_freq(request, data, dtype, freq):
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(not PANDAS_GE_200 and dtype != "datetime64[ns]"),
+            reason="Pandas < 2.0 lacks non-nano-second dtype support.",
+        )
+    )
+    actual = cudf.DatetimeIndex(data, dtype=dtype, freq=freq)
+    expected = pd.DatetimeIndex(data, dtype=dtype, freq=freq)
+    assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "data,dtype,freq",
+    [
+        ([10, 1232, 13244, 13426], "datetime64[ns]", "2N"),
+        ([10, 11, 12, 13], "datetime64[ns]", "1s"),
+        ([10000, 200, 300, 400], "datetime64[s]", "100s"),
+        ([107871, 201, 301, 401], "datetime64[ms]", "100ns"),
+    ],
+)
+def test_datetime_index_freq_error(data, dtype, freq):
+    assert_exceptions_equal(
+        pd.DatetimeIndex,
+        cudf.DatetimeIndex,
+        ([data], {"dtype": dtype, "freq": freq}),
+        ([data], {"dtype": dtype, "freq": freq}),
+    )
+
+
 def test_strings_with_utc_offset_not_implemented():
     with pytest.warns(DeprecationWarning, match="parsing timezone"):  # cupy
         with pytest.raises(NotImplementedError):

From 55266cdfa91c6022a720d4c7cbeb292cc3cda004 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 8 Dec 2023 16:01:15 +0000
Subject: [PATCH 12/14] Add validation

---
 python/cudf/cudf/core/index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 95d1c762184..931cab0e20a 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2177,7 +2177,7 @@ def _copy_type_metadata(
         self: DatetimeIndex, other: DatetimeIndex, *, override_dtypes=None
     ) -> GenericIndex:
         super()._copy_type_metadata(other, override_dtypes=override_dtypes)
-        self._freq = other._freq
+        self._freq = _validate_freq(other._freq)
         return self
 
     @classmethod

From e1b697f92de3ba5d0f062875f5c314463cefe3fc Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 8 Dec 2023 16:24:49 +0000
Subject: [PATCH 13/14] Simplify repr

---
 python/cudf/cudf/core/index.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 931cab0e20a..8fb617cd5b8 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1429,19 +1429,19 @@ def __repr__(self):
         dtype_index = tmp_meta.rfind(" dtype=")
         prior_to_dtype = tmp_meta[:dtype_index]
         lines = lines[:-1]
-        lines.append(prior_to_dtype + " dtype='%s'" % self.dtype)
+        keywords = [f"dtype='{self.dtype}'"]
         if self.name is not None:
-            lines[-1] += f", name='{self.name}'"
+            keywords.append(f"name={self.name!r}")
         if "length" in tmp_meta:
-            lines[-1] += f", length={len(self)}"
+            keywords.append(f"length={len(self)}")
         if (
             "freq" in tmp_meta
             and isinstance(self, DatetimeIndex)
             and self._freq is not None
         ):
-            lines[-1] += f", freq={self._freq}"
-        lines[-1] += ")"
-
+            keywords.append(f"freq={self._freq}")
+        keywords = ", ".join(keywords)
+        lines.append(f"{prior_to_dtype} {keywords})")
         return "\n".join(lines)
 
     @_cudf_nvtx_annotate

From 0d5c452a6bc84b9f63aa1390449cbcf0d0ba3aad Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 12 Dec 2023 02:26:23 +0000
Subject: [PATCH 14/14] Handle freq in groupby ops

---
 python/cudf/cudf/core/index.py    | 22 ++++++++++++----------
 python/cudf/cudf/core/resample.py | 14 ++++++++++++++
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8fb617cd5b8..9b14c4b0143 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1439,7 +1439,9 @@ def __repr__(self):
             and isinstance(self, DatetimeIndex)
             and self._freq is not None
         ):
-            keywords.append(f"freq={self._freq}")
+            keywords.append(
+                f"freq={self._freq._maybe_as_fast_pandas_offset().freqstr!r}"
+            )
         keywords = ", ".join(keywords)
         lines.append(f"{prior_to_dtype} {keywords})")
         return "\n".join(lines)
@@ -2705,10 +2707,9 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"):
         >>> tz_naive = cudf.date_range('2018-03-01 09:00', periods=3, freq='D')
         >>> tz_aware = tz_naive.tz_localize("America/New_York")
         >>> tz_aware
-        DatetimeIndex(['2018-03-01 09:00:00-05:00',
-                       '2018-03-02 09:00:00-05:00',
+        DatetimeIndex(['2018-03-01 09:00:00-05:00', '2018-03-02 09:00:00-05:00',
                        '2018-03-03 09:00:00-05:00'],
-                      dtype='datetime64[ns, America/New_York]')
+                      dtype='datetime64[ns, America/New_York]', freq='D')
 
         Ambiguous or nonexistent datetimes are converted to NaT.
 
@@ -2727,14 +2728,16 @@ def tz_localize(self, tz, ambiguous="NaT", nonexistent="NaT"):
         ``ambiguous`` and ``nonexistent`` arguments. Any
         ambiguous or nonexistent timestamps are converted
         to 'NaT'.
-        """
+        """  # noqa: E501
         from cudf.core._internals.timezones import delocalize, localize
 
         if tz is None:
             result_col = delocalize(self._column)
         else:
             result_col = localize(self._column, tz, ambiguous, nonexistent)
-        return DatetimeIndex._from_data({self.name: result_col})
+        return DatetimeIndex._from_data(
+            {self.name: result_col}, freq=self._freq
+        )
 
     def tz_convert(self, tz):
         """
@@ -2759,16 +2762,15 @@ def tz_convert(self, tz):
         >>> dti = cudf.date_range('2018-03-01 09:00', periods=3, freq='D')
         >>> dti = dti.tz_localize("America/New_York")
         >>> dti
-        DatetimeIndex(['2018-03-01 09:00:00-05:00',
-                       '2018-03-02 09:00:00-05:00',
+        DatetimeIndex(['2018-03-01 09:00:00-05:00', '2018-03-02 09:00:00-05:00',
                        '2018-03-03 09:00:00-05:00'],
-                      dtype='datetime64[ns, America/New_York]')
+                      dtype='datetime64[ns, America/New_York]', freq='D')
         >>> dti.tz_convert("Europe/London")
         DatetimeIndex(['2018-03-01 14:00:00+00:00',
                        '2018-03-02 14:00:00+00:00',
                        '2018-03-03 14:00:00+00:00'],
                       dtype='datetime64[ns, Europe/London]')
-        """
+        """  # noqa: E501
         from cudf.core._internals.timezones import convert
 
         if tz is None:
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index eb59cf83926..fbf25104303 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -121,6 +121,10 @@ class _ResampleGrouping(_Grouping):
 
     bin_labels: cudf.core.index.Index
 
+    def __init__(self, obj, by=None, level=None):
+        self._freq = getattr(by, "freq", None)
+        super().__init__(obj, by, level)
+
     def copy(self, deep=True):
         out = super().copy(deep=deep)
         result = _ResampleGrouping.__new__(_ResampleGrouping)
@@ -128,13 +132,22 @@ def copy(self, deep=True):
         result._named_columns = out._named_columns
         result._key_columns = out._key_columns
         result.bin_labels = self.bin_labels.copy(deep=deep)
+        result._freq = self._freq
         return result
 
+    @property
+    def keys(self):
+        index = super().keys
+        if self._freq is not None and isinstance(index, cudf.DatetimeIndex):
+            return cudf.DatetimeIndex._from_data(index._data, freq=self._freq)
+        return index
+
     def serialize(self):
         header, frames = super().serialize()
         labels_head, labels_frames = self.bin_labels.serialize()
         header["__bin_labels"] = labels_head
         header["__bin_labels_count"] = len(labels_frames)
+        header["_freq"] = self._freq
         frames.extend(labels_frames)
         return header, frames
 
@@ -152,6 +165,7 @@ def deserialize(cls, header, frames):
         out.bin_labels = cudf.core.index.Index.deserialize(
             header["__bin_labels"], frames[-header["__bin_labels_count"] :]
         )
+        out._freq = header["_freq"]
         return out
 
     def _handle_frequency_grouper(self, by):