From 3e3138341bf7bb011c2484f4d2246f88bac3bfb3 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Thu, 24 Aug 2017 12:38:27 +0200 Subject: [PATCH] Bug: groupby multiindex levels equals rows (#16859) closes #16843 --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/groupby.py | 9 +++++---- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 202fd2c13718f..2850c98c64d78 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -384,7 +384,7 @@ Groupby/Resample/Rolling - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) - +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index aa7c4517c0a01..c23b00dc740a4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, try: if isinstance(obj, DataFrame): - all_in_columns = all(g in obj.columns for g in keys) + all_in_columns_index = all(g in obj.columns or g in obj.index.names + for g in keys) else: - all_in_columns = False + all_in_columns_index = False except Exception: - all_in_columns = False + all_in_columns_index = False - if not any_callable and not all_in_columns and \ + if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com._asarray_tuplesafe(keys)] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f9e1a0d2e744a..8957beacab376 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3891,6 +3891,19 @@ def predictions(tool): result = df2.groupby('Key').apply(predictions).p1 tm.assert_series_equal(expected, result) + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = pd.DataFrame([['foo', 'bar', 'B', 1], + ['foo', 'bar', 'B', 2], + ['foo', 'baz', 'C', 3]], + columns=['first', 'second', 'third', 'one']) + df = df.set_index(['first', 'second']) + df = df.groupby(['first', 'second', 'third']).size() + assert df.loc[('foo', 'bar', 'B')] == 2 + assert df.loc[('foo', 'baz', 'C')] == 1 + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = lmap(tuple, df[keys].values)