Skip to content

Commit

Permalink
BUG: Maintain column order with groupby.nth
Browse files Browse the repository at this point in the history
  • Loading branch information
reidy-p committed Sep 23, 2018
1 parent fb784ca commit dc2428c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 9 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,8 @@ Groupby/Resample/Rolling
- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`).
- Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`).
- Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`)

- Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`)

Sparse
^^^^^^

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,8 @@ def _set_group_selection(self):

if len(groupers):
# GH12839 clear selected obj cache when group selection changes
self._group_selection = ax.difference(Index(groupers)).tolist()
self._group_selection = ax.difference(Index(groupers),
sort=False).tolist()
self._reset_cache('_selected_obj')

def _set_result_index_ordered(self, result):
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2915,17 +2915,20 @@ def intersection(self, other):
taken.name = None
return taken

def difference(self, other):
def difference(self, other, sort=True):
"""
Return a new Index with elements from the index that are not in
`other`.
This is the set difference of two Index objects.
It's sorted if sorting is possible.
Parameters
----------
other : Index or array-like
sort : bool, default True
Sort the resulting index if possible
.. versionadded:: 0.24.0
Returns
-------
Expand All @@ -2934,10 +2937,12 @@ def difference(self, other):
Examples
--------
>>> idx1 = pd.Index([1, 2, 3, 4])
>>> idx1 = pd.Index([2, 1, 3, 4])
>>> idx2 = pd.Index([3, 4, 5, 6])
>>> idx1.difference(idx2)
Int64Index([1, 2], dtype='int64')
>>> idx1.difference(idx2, sort=False)
Int64Index([2, 1], dtype='int64')
"""
self._assert_can_do_setop(other)
Expand All @@ -2955,10 +2960,11 @@ def difference(self, other):
label_diff = np.setdiff1d(np.arange(this.size), indexer,
assume_unique=True)
the_diff = this.values.take(label_diff)
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
pass
if sort:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/groupby/test_nth.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,3 +390,27 @@ def test_nth_empty():
names=['a', 'b']),
columns=['c'])
assert_frame_equal(result, expected)


def test_nth_column_order():
# GH 20760
# Check that nth preserves column order
df = DataFrame([[1, 'b', 100],
[1, 'a', 50],
[1, 'a', np.nan],
[2, 'c', 200],
[2, 'd', 150]],
columns=['A', 'C', 'B'])
result = df.groupby('A').nth(0)
expected = DataFrame([['b', 100.0],
['c', 200.0]],
columns=['C', 'B'],
index=Index([1, 2], name='A'))
assert_frame_equal(result, expected)

result = df.groupby('A').nth(-1, dropna='any')
expected = DataFrame([['a', 50.0],
['d', 150.0]],
columns=['C', 'B'],
index=Index([1, 2], name='A'))
assert_frame_equal(result, expected)

0 comments on commit dc2428c

Please sign in to comment.