Skip to content

Commit

Permalink
Merge pull request #3027 from jreback/take
Browse files Browse the repository at this point in the history
BUG:  Bug in user-facing take with negative indicies was incorrect
  • Loading branch information
jreback committed Mar 12, 2013
2 parents 2f7b0e4 + b59bf6c commit a79f08c
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 16 deletions.
5 changes: 4 additions & 1 deletion RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ pandas 0.11.0
- Bug in value_counts of ``datetime64[ns]`` Series (GH3002_)
- Fixed printing of ``NaT` in an index
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
- Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_)
- Bug in ``icol, take`` with negative indicies was producing incorrect return
values (see GH2922_, GH2892_), also check for out-of-bounds indices (GH3029_)
- Bug in DataFrame column insertion when the column creation fails, existing frame is left in
an irrecoverable state (GH3010_)
- Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
Expand All @@ -161,6 +162,7 @@ pandas 0.11.0
.. _GH2807: https://github.com/pydata/pandas/issues/2807
.. _GH2849: https://github.com/pydata/pandas/issues/2849
.. _GH2898: https://github.com/pydata/pandas/issues/2898
.. _GH2892: https://github.com/pydata/pandas/issues/2892
.. _GH2909: https://github.com/pydata/pandas/issues/2909
.. _GH2922: https://github.com/pydata/pandas/issues/2922
.. _GH2931: https://github.com/pydata/pandas/issues/2931
Expand All @@ -171,6 +173,7 @@ pandas 0.11.0
.. _GH3002: https://github.com/pydata/pandas/issues/3002
.. _GH3010: https://github.com/pydata/pandas/issues/3010
.. _GH3012: https://github.com/pydata/pandas/issues/3012
.. _GH3029: https://github.com/pydata/pandas/issues/3029


pandas 0.10.1
Expand Down
16 changes: 7 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
from pandas.core.generic import NDFrame
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
_is_index_slice, _check_bool_indexer)
_is_index_slice, _check_bool_indexer,
_maybe_convert_indices)
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.series import Series, _radd_compat
import pandas.core.expressions as expressions
Expand Down Expand Up @@ -1928,11 +1929,6 @@ def _ixs(self, i, axis=0, copy=False):
label = self.columns[i]
if isinstance(label, Index):

# if we have negative indicies, translate to postive here
# (take doesen't deal properly with these)
l = len(self.columns)
i = [ v if v >= 0 else l+v for v in i ]

return self.take(i, axis=1)

values = self._data.iget(i)
Expand Down Expand Up @@ -2911,11 +2907,13 @@ def take(self, indices, axis=0):
-------
taken : DataFrame
"""
if isinstance(indices, list):
indices = np.array(indices)

# check/convert indicies here
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))

if self._is_mixed_type:
if axis == 0:
new_data = self._data.take(indices, axis=1)
new_data = self._data.take(indices, axis=1, verify=False)
return DataFrame(new_data)
else:
new_columns = self.columns.take(indices)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pandas.core.index import MultiIndex
import pandas.core.indexing as indexing
from pandas.core.indexing import _maybe_convert_indices
from pandas.tseries.index import DatetimeIndex
import pandas.core.common as com
import pandas.lib as lib
Expand Down Expand Up @@ -943,12 +944,16 @@ def take(self, indices, axis=0):
-------
taken : type of caller
"""

# check/convert indicies here
indices = _maybe_convert_indices(indices, len(self._get_axis(axis)))

if axis == 0:
labels = self._get_axis(axis)
new_items = labels.take(indices)
new_data = self._data.reindex_axis(new_items, axis=0)
else:
new_data = self._data.take(indices, axis=axis)
new_data = self._data.take(indices, axis=axis, verify=False)
return self._constructor(new_data)

def tz_convert(self, tz, axis=0, copy=True):
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,20 @@ def _is_series(obj):
return isinstance(obj, Series)


def _maybe_convert_indices(indices, n):
""" if we have negative indicies, translate to postive here
if have indicies that are out-of-bounds, raise an IndexError """
if isinstance(indices, list):
indices = np.array(indices)

mask = indices<0
if mask.any():
indices[mask] += n
mask = (indices>=n) | (indices<0)
if mask.any():
raise IndexError("indices are out-of-bounds")
return indices

def _maybe_convert_ix(*args):
"""
We likely want to take the cross-product
Expand Down
9 changes: 6 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np

from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes
from pandas.core.indexing import _check_slice_bounds
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
import pandas.core.common as com
import pandas.lib as lib
import pandas.tslib as tslib
Expand Down Expand Up @@ -1517,13 +1517,16 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
na_block = make_block(block_values, items, ref_items)
return na_block

def take(self, indexer, axis=1):
def take(self, indexer, axis=1, verify=True):
if axis < 1:
raise AssertionError('axis must be at least 1, got %d' % axis)

indexer = com._ensure_platform_int(indexer)

n = len(self.axes[axis])

if verify:
indexer = _maybe_convert_indices(indexer, n)

if ((indexer == -1) | (indexer >= n)).any():
raise Exception('Indices must be nonzero and less than '
'the axis length')
Expand Down
46 changes: 45 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8615,12 +8615,43 @@ def test_fillna_col_reordering(self):
self.assert_(df.columns.tolist() == filled.columns.tolist())

def test_take(self):

# homogeneous
#----------------------------------------
order = [3, 1, 2, 0]
for df in [self.frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['D', 'B', 'C', 'A']]
assert_frame_equal(result, expected, check_names=False)

# neg indicies
order = [2,1,-1]
for df in [self.frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['C', 'B', 'D']]
assert_frame_equal(result, expected, check_names=False)

# illegal indices
self.assertRaises(IndexError, df.take, [3,1,2,30], axis=0)
self.assertRaises(IndexError, df.take, [3,1,2,-31], axis=0)
self.assertRaises(IndexError, df.take, [3,1,2,5], axis=1)
self.assertRaises(IndexError, df.take, [3,1,2,-5], axis=1)

# mixed-dtype
#----------------------------------------
order = [4, 1, 2, 0, 3]
order = [4, 1, 2, 0, 3]
for df in [self.mixed_frame]:

result = df.take(order, axis=0)
Expand All @@ -8632,6 +8663,19 @@ def test_take(self):
expected = df.ix[:, ['foo', 'B', 'C', 'A', 'D']]
assert_frame_equal(result, expected)

# neg indicies
order = [4,1,-2]
for df in [self.mixed_frame]:

result = df.take(order, axis=0)
expected = df.reindex(df.index.take(order))
assert_frame_equal(result, expected)

# axis = 1
result = df.take(order, axis=1)
expected = df.ix[:, ['foo', 'B', 'D']]
assert_frame_equal(result, expected)

# by dtype
order = [1, 2, 0, 3]
for df in [self.mixed_float,self.mixed_int]:
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1013,7 +1013,11 @@ def test_take(self):
expected = self.panel.reindex(minor=['D', 'A', 'B', 'C'])
assert_panel_equal(result, expected)

self.assertRaises(Exception, self.panel.take, [3, -1, 1, 2], axis=2)
# neg indicies ok
expected = self.panel.reindex(minor=['D', 'D', 'B', 'C'])
result = self.panel.take([3, -1, 1, 2], axis=2)
assert_panel_equal(result, expected)

self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2)

def test_sort_index(self):
Expand Down

0 comments on commit a79f08c

Please sign in to comment.