Skip to content

Commit

Permalink
Bug: groupby multiindex levels equals rows (pandas-dev#16859)
Browse files Browse the repository at this point in the history
  • Loading branch information
P-Tillmann authored and jowens committed Sep 20, 2017
1 parent a9574b0 commit 3e31383
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ Groupby/Resample/Rolling
- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)

- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)

Sparse
^^^^^^
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

try:
if isinstance(obj, DataFrame):
all_in_columns = all(g in obj.columns for g in keys)
all_in_columns_index = all(g in obj.columns or g in obj.index.names
for g in keys)
else:
all_in_columns = False
all_in_columns_index = False
except Exception:
all_in_columns = False
all_in_columns_index = False

if not any_callable and not all_in_columns and \
if not any_callable and not all_in_columns_index and \
not any_arraylike and not any_groupers and \
match_axis_length and level is None:
keys = [com._asarray_tuplesafe(keys)]
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3891,6 +3891,19 @@ def predictions(tool):
result = df2.groupby('Key').apply(predictions).p1
tm.assert_series_equal(expected, result)

def test_gb_key_len_equal_axis_len(self):
# GH16843
# test ensures that index and column keys are recognized correctly
# when number of keys equals axis length of groupby
df = pd.DataFrame([['foo', 'bar', 'B', 1],
['foo', 'bar', 'B', 2],
['foo', 'baz', 'C', 3]],
columns=['first', 'second', 'third', 'one'])
df = df.set_index(['first', 'second'])
df = df.groupby(['first', 'second', 'third']).size()
assert df.loc[('foo', 'bar', 'B')] == 2
assert df.loc[('foo', 'baz', 'C')] == 1


def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
tups = lmap(tuple, df[keys].values)
Expand Down

0 comments on commit 3e31383

Please sign in to comment.