From c70c7672851d30177ccb726c4efb1bef9f9bf2b9 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 12 Nov 2018 14:31:54 -0500 Subject: [PATCH] BUG: Keep column level name in resample nunique Closes gh-23222 xref #23645 --- doc/source/reference/groupby.rst | 1 + doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/groupby/generic.py | 1 + pandas/tests/groupby/test_function.py | 9 +++++++++ pandas/tests/resample/test_datetime_index.py | 9 +++++++++ 5 files changed, 21 insertions(+) diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 6ed85ff2fac436..c7f9113b53c223 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -99,6 +99,7 @@ application to columns of a specific data type. DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin DataFrameGroupBy.mad + DataFrameGroupBy.nunique DataFrameGroupBy.pct_change DataFrameGroupBy.plot DataFrameGroupBy.quantile diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 170e7f14da3973..ee16246a1421db 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -210,6 +210,7 @@ Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - - diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 52056a6842ed9b..683c21f7bd47a6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1579,6 +1579,7 @@ def groupby_series(obj, col=None): from pandas.core.reshape.concat import concat results = [groupby_series(obj[col], col) for col in obj.columns] results = concat(results, axis=1) + results.columns.names = obj.columns.names if not self.as_index: results.index = ibase.default_index(len(results)) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index a884a37840f8ad..1788b29a11082b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -897,6 +897,15 @@ def test_nunique_with_timegrouper(): tm.assert_series_equal(result, expected) +def test_nunique_preserves_column_level_names(): + # GH 23222 + test = pd.DataFrame([1, 2, 2], + columns=pd.Index(['A'], name="level_0")) + result = test.groupby([0, 0, 0]).nunique() + expected = pd.DataFrame([2], columns=test.columns) + tm.assert_frame_equal(result, expected) + + # count # -------------------------------- diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 71b100401ec219..ec662b4f2f1175 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1135,6 +1135,15 @@ def test_resample_nunique(): assert_series_equal(result, expected) +def test_resample_nunique_preserves_column_level_names(): + # see gh-23222 + df = tm.makeTimeDataFrame(freq="1D").abs() + df.columns = pd.MultiIndex.from_arrays([df.columns.tolist()] * 2, + names=["lev0", "lev1"]) + result = df.resample("1h").nunique() + tm.assert_index_equal(df.columns, result.columns) + + def test_resample_nunique_with_date_gap(): # GH 13453 index = pd.date_range('1-1-2000', '2-15-2000', freq='h')