From b59bf6c7eac082b903e881ed9da351379aaa7eed Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 12 Mar 2013 14:18:10 -0400 Subject: [PATCH] BUG: Bug in user-facing take with negative indicies was incorrect producing incorrect return values/failing (GH2892_) BUG: extended negative indicies with take to generic.py (Panel/Series) ENH: add out-of-bounds checking on use-facing take --- RELEASE.rst | 5 ++++- pandas/core/frame.py | 16 ++++++------- pandas/core/generic.py | 7 +++++- pandas/core/indexing.py | 14 ++++++++++++ pandas/core/internals.py | 9 +++++--- pandas/tests/test_frame.py | 46 +++++++++++++++++++++++++++++++++++++- pandas/tests/test_panel.py | 6 ++++- 7 files changed, 87 insertions(+), 16 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 970b89c99a7b1..a4955db02096e 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -140,7 +140,8 @@ pandas 0.11.0 - Bug in value_counts of ``datetime64[ns]`` Series (GH3002_) - Fixed printing of ``NaT` in an index - Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__) - - Bug in ``icol`` with negative indicies was incorrect producing incorrect return values (see GH2922_) + - Bug in ``icol, take`` with negative indicies was producing incorrect return + values (see GH2922_, GH2892_), also check for out-of-bounds indices (GH3029_) - Bug in DataFrame column insertion when the column creation fails, existing frame is left in an irrecoverable state (GH3010_) @@ -160,6 +161,7 @@ pandas 0.11.0 .. _GH2807: https://github.com/pydata/pandas/issues/2807 .. _GH2849: https://github.com/pydata/pandas/issues/2849 .. _GH2898: https://github.com/pydata/pandas/issues/2898 +.. _GH2892: https://github.com/pydata/pandas/issues/2892 .. _GH2909: https://github.com/pydata/pandas/issues/2909 .. _GH2922: https://github.com/pydata/pandas/issues/2922 .. _GH2931: https://github.com/pydata/pandas/issues/2931 @@ -170,6 +172,7 @@ pandas 0.11.0 .. _GH3002: https://github.com/pydata/pandas/issues/3002 .. _GH3010: https://github.com/pydata/pandas/issues/3010 .. _GH3012: https://github.com/pydata/pandas/issues/3012 +.. _GH3029: https://github.com/pydata/pandas/issues/3029 pandas 0.10.1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7ae2cc6d5b6ed..609a04fdb15eb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -28,7 +28,8 @@ from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, - _is_index_slice, _check_bool_indexer) + _is_index_slice, _check_bool_indexer, + _maybe_convert_indices) from pandas.core.internals import BlockManager, make_block, form_blocks from pandas.core.series import Series, _radd_compat import pandas.core.expressions as expressions @@ -1928,11 +1929,6 @@ def _ixs(self, i, axis=0, copy=False): label = self.columns[i] if isinstance(label, Index): - # if we have negative indicies, translate to postive here - # (take doesen't deal properly with these) - l = len(self.columns) - i = [ v if v >= 0 else l+v for v in i ] - return self.take(i, axis=1) values = self._data.iget(i) @@ -2911,11 +2907,13 @@ def take(self, indices, axis=0): ------- taken : DataFrame """ - if isinstance(indices, list): - indices = np.array(indices) + + # check/convert indicies here + indices = _maybe_convert_indices(indices, len(self._get_axis(axis))) + if self._is_mixed_type: if axis == 0: - new_data = self._data.take(indices, axis=1) + new_data = self._data.take(indices, axis=1, verify=False) return DataFrame(new_data) else: new_columns = self.columns.take(indices) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 236d7d8aeadf8..093f600f8c4ea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4,6 +4,7 @@ from pandas.core.index import MultiIndex import pandas.core.indexing as indexing +from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com import pandas.lib as lib @@ -943,12 +944,16 @@ def take(self, indices, axis=0): ------- taken : type of caller """ + + # check/convert indicies here + indices = _maybe_convert_indices(indices, len(self._get_axis(axis))) + if axis == 0: labels = self._get_axis(axis) new_items = labels.take(indices) new_data = self._data.reindex_axis(new_items, axis=0) else: - new_data = self._data.take(indices, axis=axis) + new_data = self._data.take(indices, axis=axis, verify=False) return self._constructor(new_data) def tz_convert(self, tz, axis=0, copy=True): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b86518e8947ef..17423075b6bfb 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -913,6 +913,20 @@ def _is_series(obj): return isinstance(obj, Series) +def _maybe_convert_indices(indices, n): + """ if we have negative indicies, translate to postive here + if have indicies that are out-of-bounds, raise an IndexError """ + if isinstance(indices, list): + indices = np.array(indices) + + mask = indices<0 + if mask.any(): + indices[mask] += n + mask = (indices>=n) | (indices<0) + if mask.any(): + raise IndexError("indices are out-of-bounds") + return indices + def _maybe_convert_ix(*args): """ We likely want to take the cross-product diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 5c0f0935346cc..96cc41be26b92 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.index import Index, _ensure_index, _handle_legacy_indexes -from pandas.core.indexing import _check_slice_bounds +from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib @@ -1517,13 +1517,16 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan): na_block = make_block(block_values, items, ref_items) return na_block - def take(self, indexer, axis=1): + def take(self, indexer, axis=1, verify=True): if axis < 1: raise AssertionError('axis must be at least 1, got %d' % axis) indexer = com._ensure_platform_int(indexer) - n = len(self.axes[axis]) + + if verify: + indexer = _maybe_convert_indices(indexer, n) + if ((indexer == -1) | (indexer >= n)).any(): raise Exception('Indices must be nonzero and less than ' 'the axis length') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 85f66148eba8a..903ae1040413f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8602,12 +8602,43 @@ def test_fillna_col_reordering(self): self.assert_(df.columns.tolist() == filled.columns.tolist()) def test_take(self): + # homogeneous #---------------------------------------- + order = [3, 1, 2, 0] + for df in [self.frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.ix[:, ['D', 'B', 'C', 'A']] + assert_frame_equal(result, expected, check_names=False) + + # neg indicies + order = [2,1,-1] + for df in [self.frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.ix[:, ['C', 'B', 'D']] + assert_frame_equal(result, expected, check_names=False) + + # illegal indices + self.assertRaises(IndexError, df.take, [3,1,2,30], axis=0) + self.assertRaises(IndexError, df.take, [3,1,2,-31], axis=0) + self.assertRaises(IndexError, df.take, [3,1,2,5], axis=1) + self.assertRaises(IndexError, df.take, [3,1,2,-5], axis=1) # mixed-dtype #---------------------------------------- - order = [4, 1, 2, 0, 3] + order = [4, 1, 2, 0, 3] for df in [self.mixed_frame]: result = df.take(order, axis=0) @@ -8619,6 +8650,19 @@ def test_take(self): expected = df.ix[:, ['foo', 'B', 'C', 'A', 'D']] assert_frame_equal(result, expected) + # neg indicies + order = [4,1,-2] + for df in [self.mixed_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.ix[:, ['foo', 'B', 'D']] + assert_frame_equal(result, expected) + # by dtype order = [1, 2, 0, 3] for df in [self.mixed_float,self.mixed_int]: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index b3aa38b9a972e..d857e999bdd33 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1013,7 +1013,11 @@ def test_take(self): expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) assert_panel_equal(result, expected) - self.assertRaises(Exception, self.panel.take, [3, -1, 1, 2], axis=2) + # neg indicies ok + expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) + result = self.panel.take([3, -1, 1, 2], axis=2) + assert_panel_equal(result, expected) + self.assertRaises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) def test_sort_index(self):