diff --git a/doc/source/api.rst b/doc/source/api.rst index 6b188deb9eb42..2f4fd860f270a 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -434,9 +434,8 @@ Reshaping, sorting :toctree: generated/ Series.argsort - Series.order Series.reorder_levels - Series.sort + Series.sort_values Series.sort_index Series.sortlevel Series.swaplevel @@ -908,7 +907,7 @@ Reshaping, sorting, transposing DataFrame.pivot DataFrame.reorder_levels - DataFrame.sort + DataFrame.sort_values DataFrame.sort_index DataFrame.sortlevel DataFrame.nlargest @@ -1293,7 +1292,6 @@ Modifying and Computations Index.insert Index.min Index.max - Index.order Index.reindex Index.repeat Index.take @@ -1319,8 +1317,7 @@ Sorting :toctree: generated/ Index.argsort - Index.order - Index.sort + Index.sort_values Time-specific operations ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 624e10b431de5..1209cb9934e82 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1418,29 +1418,48 @@ description. .. _basics.sorting: -Sorting by index and value --------------------------- +Sorting +------- + +.. warning:: + + The sorting API is substantially changed in 0.17.0, see :ref:`here ` for these changes. + In particular, all sorting methods now return a new object by default, and **DO NOT** operate in-place (except by passing ``inplace=True``). There are two obvious kinds of sorting that you may be interested in: sorting -by label and sorting by actual values. The primary method for sorting axis -labels (indexes) across data structures is the :meth:`~DataFrame.sort_index` method. +by label and sorting by actual values. + +By Index +~~~~~~~~ + +The primary method for sorting axis +labels (indexes) are the ``Series.sort_index()`` and the ``DataFrame.sort_index()`` methods. .. ipython:: python unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'], columns=['three', 'two', 'one']) + + # DataFrame unsorted_df.sort_index() unsorted_df.sort_index(ascending=False) unsorted_df.sort_index(axis=1) -:meth:`DataFrame.sort_index` can accept an optional ``by`` argument for ``axis=0`` + # Series + unsorted_df['three'].sort_index() + +By Values +~~~~~~~~~ + +The :meth:`Series.sort_values` and :meth:`DataFrame.sort_values` are the entry points for **value** sorting (that is the values in a column or row). +:meth:`DataFrame.sort_values` can accept an optional ``by`` argument for ``axis=0`` which will use an arbitrary vector or a column name of the DataFrame to determine the sort order: .. ipython:: python df1 = pd.DataFrame({'one':[2,1,1,1],'two':[1,3,2,4],'three':[5,4,3,2]}) - df1.sort_index(by='two') + df1.sort_values(by='two') The ``by`` argument can take a list of column names, e.g.: @@ -1448,9 +1467,7 @@ The ``by`` argument can take a list of column names, e.g.: df1[['one', 'two', 'three']].sort_index(by=['one','two']) -Series has the method :meth:`~Series.order` (analogous to `R's order function -`__) which -sorts by value, with special treatment of NA values via the ``na_position`` +These methods have special treatment of NA values via the ``na_position`` argument: .. ipython:: python @@ -1459,11 +1476,11 @@ argument: s.order() s.order(na_position='first') -.. note:: - :meth:`Series.sort` sorts a Series by value in-place. This is to provide - compatibility with NumPy methods which expect the ``ndarray.sort`` - behavior. :meth:`Series.order` returns a copy of the sorted data. +.. _basics.searchsorted: + +searchsorted +~~~~~~~~~~~~ Series has the :meth:`~Series.searchsorted` method, which works similar to :meth:`numpy.ndarray.searchsorted`. @@ -1493,7 +1510,7 @@ faster than sorting the entire Series and calling ``head(n)`` on the result. s = pd.Series(np.random.permutation(10)) s - s.order() + s.sort_values() s.nsmallest(3) s.nlargest(3) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 7e69a8044a305..bba1db0e25349 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -14,6 +14,7 @@ users upgrade to this version. Highlights include: - Release the Global Interpreter Lock (GIL) on some cython operations, see :ref:`here ` +- The sorting API has been revamped to remove some long-time inconsistencies, see :ref:`here ` - The default for ``to_datetime`` will now be to ``raise`` when presented with unparseable formats, previously this would return the original input, see :ref:`here ` - The default for ``dropna`` in ``HDFStore`` has changed to ``False``, to store by default all rows even @@ -187,6 +188,65 @@ Other enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0170.api_breaking.sorting: + +Changes to sorting API +^^^^^^^^^^^^^^^^^^^^^^ + +The sorting API has had some longtime inconsistencies. (:issue:`9816`,:issue:`8239`). + +Here is a summary of the **prior** to 0.17.0 API + +- ``Series.sort`` is **INPLACE** while ``DataFrame.sort`` returns a new object. +- ``Series.order`` returned a new object +- It was possible to use ``Series/DataFrame.sort_index`` to sort by **values** by passing the ``by`` keyword. +- ``Series/DataFrame.sortlevel`` worked only on a ``MultiIndex`` for sorting by index. + +To address these issues, we have revamped the API: + +- We have introduced a new method, :meth:`DataFrame.sort_values`, which is the merger of ``DataFrame.sort()``, ``Series.sort()``, + and ``Series.order``, to handle sorting of **values**. +- The existing method ``Series.sort()`` has been deprecated and will be removed in a + future version of pandas. +- The ``by`` argument of ``DataFrame.sort_index()`` has been deprecated and will be removed in a future version of pandas. +- The methods ``DataFrame.sort()``, ``Series.order()``, will not be recommended to use and will carry a deprecation warning + in the doc-string. +- The existing method ``.sort_index()`` will gain the ``level`` keyword to enable level sorting. + +We now have two distinct and non-overlapping methods of sorting. A ``*`` marks items that +will show a ``FutureWarning``. + +To sort by the **values**: + +================================= ==================================== +Previous Replacement +================================= ==================================== +\*``Series.order()`` ``Series.sort_values()`` +\*``Series.sort()`` ``Series.sort_values(inplace=True)`` +\*``DataFrame.sort(columns=...)`` ``DataFrame.sort_values(by=...)`` +================================= ==================================== + +To sort by the **index**: + +================================= ==================================== +Previous Equivalent +================================= ==================================== +``Series.sort_index()`` ``Series.sort_index()`` +``Series.sortlevel(level=...)`` ``Series.sort_index(level=...``) +``DataFrame.sort_index()`` ``DataFrame.sort_index()`` +``DataFrame.sortlevel(level=...)`` ``DataFrame.sort_index(level=...)`` +\*``DataFrame.sort()`` ``DataFrame.sort_index()`` +================================== ==================================== + +We have also deprecated and changed similar methods in two Series-like classes, ``Index`` and ``Categorical``. + +================================== ==================================== +Previous Replacement +================================== ==================================== +\*``Index.order()`` ``Index.sort_values()`` +\*``Categorical.order()`` ``Categorical.sort_values`` +================================== ==================================== + .. _whatsnew_0170.api_breaking.to_datetime: Changes to to_datetime and to_timedelta @@ -570,7 +630,7 @@ Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`) - +- Removal of ``na_last`` parameters from ``Series.order()`` and ``Series.sort()``, in favor of ``na_position``, xref (:issue:`5231`) .. _whatsnew_0170.performance: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b0c7ff43bc7d8..4bcb24b684ed1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -262,9 +262,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, result.index = bins[:-1] if sort: - result.sort() - if not ascending: - result = result[::-1] + result = result.sort_values(ascending=ascending) if normalize: result = result / float(values.size) @@ -497,7 +495,7 @@ def select_n_slow(dropped, n, take_last, method): reverse_it = take_last or method == 'nlargest' ascending = method == 'nsmallest' slc = np.s_[::-1] if reverse_it else np.s_[:] - return dropped[slc].order(ascending=ascending).head(n) + return dropped[slc].sort_values(ascending=ascending).head(n) _select_methods = {'nsmallest': nsmallest, 'nlargest': nlargest} diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index c9e30ea31dab8..0e61170471dcc 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1083,7 +1083,7 @@ def argsort(self, ascending=True, **kwargs): result = result[::-1] return result - def order(self, inplace=False, ascending=True, na_position='last'): + def sort_values(self, inplace=False, ascending=True, na_position='last'): """ Sorts the Category by category value returning a new Categorical by default. Only ordered Categoricals can be sorted! @@ -1092,10 +1092,10 @@ def order(self, inplace=False, ascending=True, na_position='last'): Parameters ---------- - ascending : boolean, default True - Sort ascending. Passing False sorts descending inplace : boolean, default False Do operation in place. + ascending : boolean, default True + Sort ascending. Passing False sorts descending na_position : {'first', 'last'} (optional, default='last') 'first' puts NaNs at the beginning 'last' puts NaNs at the end @@ -1139,6 +1139,37 @@ def order(self, inplace=False, ascending=True, na_position='last'): return Categorical(values=codes,categories=self.categories, ordered=self.ordered, fastpath=True) + def order(self, inplace=False, ascending=True, na_position='last'): + """ + DEPRECATED: use :meth:`Categorical.sort_values` + + Sorts the Category by category value returning a new Categorical by default. + + Only ordered Categoricals can be sorted! + + Categorical.sort is the equivalent but sorts the Categorical inplace. + + Parameters + ---------- + inplace : boolean, default False + Do operation in place. + ascending : boolean, default True + Sort ascending. Passing False sorts descending + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + + Returns + ------- + y : Category or None + + See Also + -------- + Category.sort + """ + warn("order is deprecated, use sort_values(...)", + FutureWarning, stacklevel=2) + return self.sort_values(inplace=inplace, ascending=ascending, na_position=na_position) def sort(self, inplace=True, ascending=True, na_position='last'): """ Sorts the Category inplace by category value. @@ -1163,10 +1194,10 @@ def sort(self, inplace=True, ascending=True, na_position='last'): See Also -------- - Category.order + Category.sort_values """ - return self.order(inplace=inplace, ascending=ascending, - na_position=na_position) + return self.sort_values(inplace=inplace, ascending=ascending, + na_position=na_position) def ravel(self, order='C'): """ Return a flattened (numpy) array. diff --git a/pandas/core/common.py b/pandas/core/common.py index aaa341240f538..53cd5ca9aa78b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2155,6 +2155,9 @@ def _mut_exclusive(**kwargs): return val2 +def _not_none(*args): + return (arg for arg in args if arg is not None) + def _any_none(*args): for arg in args: if arg is None: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 062cbe579785c..2bec83d7b094d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,7 +23,7 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import (isnull, notnull, PandasError, _try_sort, +from pandas.core.common import (isnull, notnull, PandasError, _try_sort, _not_none, _default_index, _maybe_upcast, is_sequence, _infer_dtype_from_scalar, _values_from_object, is_list_like, _maybe_box_datetimelike, @@ -2949,9 +2949,71 @@ def f(vals): #---------------------------------------------------------------------- # Sorting + @Appender(_shared_docs['sort_values'] % _shared_doc_kwargs) + def sort_values(self, by, axis=0, ascending=True, inplace=False, + kind='quicksort', na_position='last'): + + axis = self._get_axis_number(axis) + labels = self._get_axis(axis) + + if axis != 0: + raise ValueError('When sorting by column, axis must be 0 ' + '(rows)') + if not isinstance(by, list): + by = [by] + if com.is_sequence(ascending) and len(by) != len(ascending): + raise ValueError('Length of ascending (%d) != length of by' + ' (%d)' % (len(ascending), len(by))) + if len(by) > 1: + from pandas.core.groupby import _lexsort_indexer + + def trans(v): + if com.needs_i8_conversion(v): + return v.view('i8') + return v + keys = [] + for x in by: + k = self[x].values + if k.ndim == 2: + raise ValueError('Cannot sort by duplicate column %s' % str(x)) + keys.append(trans(k)) + indexer = _lexsort_indexer(keys, orders=ascending, + na_position=na_position) + indexer = com._ensure_platform_int(indexer) + else: + from pandas.core.groupby import _nargsort + + by = by[0] + k = self[by].values + if k.ndim == 2: + + # try to be helpful + if isinstance(self.columns, MultiIndex): + raise ValueError('Cannot sort by column %s in a multi-index' + ' you need to explicity provide all the levels' + % str(by)) + + raise ValueError('Cannot sort by duplicate column %s' + % str(by)) + if isinstance(ascending, (tuple, list)): + ascending = ascending[0] + + indexer = _nargsort(k, kind=kind, ascending=ascending, + na_position=na_position) + + new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), + convert=False, verify=False) + + if inplace: + return self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + def sort(self, columns=None, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): """ + DEPRECATED: use :meth:`DataFrame.sort_values` + Sort DataFrame either by labels (along either axis) or by the values in column(s) @@ -2982,93 +3044,41 @@ def sort(self, columns=None, axis=0, ascending=True, ------- sorted : DataFrame """ - return self.sort_index(by=columns, axis=axis, ascending=ascending, - inplace=inplace, kind=kind, na_position=na_position) - def sort_index(self, axis=0, by=None, ascending=True, inplace=False, - kind='quicksort', na_position='last'): - """ - Sort DataFrame either by labels (along either axis) or by the values in - a column + if columns is None: + warnings.warn("sort(....) is deprecated, use sort_index(.....)", + FutureWarning, stacklevel=2) + return self.sort_index(axis=axis, ascending=ascending, inplace=inplace) - Parameters - ---------- - axis : {0 or 'index', 1 or 'columns'}, default 0 - Sort index/rows versus columns - by : object - Column name(s) in frame. Accepts a column name or a list - for a nested sort. A tuple will be interpreted as the - levels of a multi-index. - ascending : boolean or list, default True - Sort ascending vs. descending. Specify list for multiple sort - orders - inplace : boolean, default False - Sort the DataFrame without creating a new instance - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - kind : {'quicksort', 'mergesort', 'heapsort'}, optional - This option is only applied when sorting on a single column or label. + warnings.warn("sort(columns=....) is deprecated, use sort_values(by=.....)", + FutureWarning, stacklevel=2) + return self.sort_values(by=columns, axis=axis, ascending=ascending, + inplace=inplace, kind=kind, na_position=na_position) - Examples - -------- - >>> result = df.sort_index(by=['A', 'B'], ascending=[True, False]) + @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs) + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, + kind='quicksort', na_position='last', sort_remaining=True, by=None): - Returns - ------- - sorted : DataFrame - """ + # 10726 + if by is not None: + warnings.warn("by argument to sort_index is deprecated, pls use .sort_values(by=...)", + FutureWarning, stacklevel=2) + if level is not None: + raise ValueError("unable to simultaneously sort by and level") + return self.sort_values(by, axis=axis, ascending=ascending, inplace=inplace) - from pandas.core.groupby import _lexsort_indexer, _nargsort - axis = self._get_axis_number(axis) - if axis not in [0, 1]: # pragma: no cover - raise AssertionError('Axis must be 0 or 1, got %s' % str(axis)) + axis = self._get_axis_number(axis) labels = self._get_axis(axis) - if by is not None: - if axis != 0: - raise ValueError('When sorting by column, axis must be 0 ' - '(rows)') - if not isinstance(by, list): - by = [by] - if com.is_sequence(ascending) and len(by) != len(ascending): - raise ValueError('Length of ascending (%d) != length of by' - ' (%d)' % (len(ascending), len(by))) - if len(by) > 1: - def trans(v): - if com.needs_i8_conversion(v): - return v.view('i8') - return v - keys = [] - for x in by: - k = self[x].values - if k.ndim == 2: - raise ValueError('Cannot sort by duplicate column %s' % str(x)) - keys.append(trans(k)) - indexer = _lexsort_indexer(keys, orders=ascending, - na_position=na_position) - indexer = com._ensure_platform_int(indexer) - else: - by = by[0] - k = self[by].values - if k.ndim == 2: - - # try to be helpful - if isinstance(self.columns, MultiIndex): - raise ValueError('Cannot sort by column %s in a multi-index' - ' you need to explicity provide all the levels' - % str(by)) - - raise ValueError('Cannot sort by duplicate column %s' - % str(by)) - if isinstance(ascending, (tuple, list)): - ascending = ascending[0] + # sort by the index + if level is not None: - indexer = _nargsort(k, kind=kind, ascending=ascending, - na_position=na_position) + new_axis, indexer = labels.sortlevel(level, ascending=ascending, + sort_remaining=sort_remaining) elif isinstance(labels, MultiIndex): + from pandas.core.groupby import _lexsort_indexer # make sure that the axis is lexsorted to start # if not we need to reconstruct to get the correct indexer @@ -3077,13 +3087,13 @@ def trans(v): indexer = _lexsort_indexer(labels.labels, orders=ascending, na_position=na_position) - indexer = com._ensure_platform_int(indexer) else: + from pandas.core.groupby import _nargsort + indexer = _nargsort(labels, kind=kind, ascending=ascending, na_position=na_position) - bm_axis = self._get_block_manager_axis(axis) - new_data = self._data.take(indexer, axis=bm_axis, + new_data = self._data.take(indexer, axis=self._get_block_manager_axis(axis), convert=False, verify=False) if inplace: @@ -3111,30 +3121,15 @@ def sortlevel(self, level=0, axis=0, ascending=True, Returns ------- sorted : DataFrame - """ - axis = self._get_axis_number(axis) - the_axis = self._get_axis(axis) - if not isinstance(the_axis, MultiIndex): - raise TypeError('can only sort by level with a hierarchical index') - new_axis, indexer = the_axis.sortlevel(level, ascending=ascending, - sort_remaining=sort_remaining) - - if self._is_mixed_type and not inplace: - ax = 'index' if axis == 0 else 'columns' + See Also + -------- + DataFrame.sort_index(level=...) - if new_axis.is_unique: - return self.reindex(**{ax: new_axis}) - else: - return self.take(indexer, axis=axis, convert=False) + """ + return self.sort_index(level=level, axis=axis, ascending=ascending, + inplace=inplace, sort_remaining=sort_remaining) - bm_axis = self._get_block_manager_axis(axis) - new_data = self._data.take(indexer, axis=bm_axis, - convert=False, verify=False) - if inplace: - return self._update_inplace(new_data) - else: - return self._constructor(new_data).__finalize__(self) def _nsorted(self, columns, n, method, take_last): if not com.is_list_like(columns): @@ -3142,8 +3137,8 @@ def _nsorted(self, columns, n, method, take_last): columns = list(columns) ser = getattr(self[columns[0]], method)(n, take_last=take_last) ascending = dict(nlargest=False, nsmallest=True)[method] - return self.loc[ser.index].sort(columns, ascending=ascending, - kind='mergesort') + return self.loc[ser.index].sort_values(columns, ascending=ascending, + kind='mergesort') def nlargest(self, n, columns, take_last=False): """Get the rows of a DataFrame sorted by the `n` largest diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2fc288de438b3..1cd7046fa678e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -922,7 +922,7 @@ def to_hdf(self, path_or_buf, key, **kwargs): in the store wherever possible fletcher32 : bool, default False If applying compression use the fletcher32 checksum - dropna : boolean, default False. + dropna : boolean, default False. If true, ALL nan rows will not be written to store. """ @@ -1683,25 +1683,74 @@ def add_suffix(self, suffix): new_data = self._data.add_suffix(suffix) return self._constructor(new_data).__finalize__(self) - def sort_index(self, axis=0, ascending=True): + _shared_docs['sort_values'] = """ + Sort by the values along either axis + + .. versionadded:: 0.17.0 + + Parameters + ---------- + by : string name or list of names which refer to the axis items + axis : %(axes)s to direct sorting + ascending : bool or list of bool + Sort ascending vs. descending. Specify list for multiple sort orders. + If this is a list of bools, must match the length of the by + inplace : bool + if True, perform operation in-place + kind : {`quicksort`, `mergesort`, `heapsort`} + Choice of sorting algorithm. See also ndarray.np.sort for more information. + `mergesort` is the only stable algorithm. For DataFrames, this option is + only applied when sorting on a single column or label. + na_position : {'first', 'last'} + `first` puts NaNs at the beginning, `last` puts NaNs at the end + + Returns + ------- + sorted_obj : %(klass)s """ + def sort_values(self, by, axis=0, ascending=True, inplace=False, + kind='quicksort', na_position='last'): + raise AbstractMethodError(self) + + _shared_docs['sort_index'] = """ Sort object by labels (along an axis) Parameters ---------- - axis : {0, 1} - Sort index/rows versus columns + axis : %(axes)s to direct sorting + level : int or level name or list of ints or list of level names + if not None, sort on values in specified index level(s) ascending : boolean, default True Sort ascending vs. descending + inplace : bool + if True, perform operation in-place + kind : {`quicksort`, `mergesort`, `heapsort`} + Choice of sorting algorithm. See also ndarray.np.sort for more information. + `mergesort` is the only stable algorithm. For DataFrames, this option is + only applied when sorting on a single column or label. + na_position : {'first', 'last'} + `first` puts NaNs at the beginning, `last` puts NaNs at the end + sort_remaining : bool + if true and sorting by level and index is multilevel, sort by other levels + too (in order) after sorting by specified level Returns ------- - sorted_obj : type of caller + sorted_obj : %(klass)s """ + + @Appender(_shared_docs['sort_index'] % dict(axes="axes", klass="NDFrame")) + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, + kind='quicksort', na_position='last', sort_remaining=True): axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) labels = self._get_axis(axis) + if level is not None: + raise NotImplementedError("level is not implemented") + if inplace: + raise NotImplementedError("inplace is not implemented") + sort_index = labels.argsort() if not ascending: sort_index = sort_index[::-1] @@ -2910,7 +2959,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, use the actual numerical values of the index * 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all wrappers around the scipy interpolation methods of similar - names. These use the actual numerical values of the index. See + names. These use the actual numerical values of the index. See the scipy documentation for more on their behavior: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html diff --git a/pandas/core/index.py b/pandas/core/index.py index 12ad8a590c304..9f7221df149bc 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1332,7 +1332,7 @@ def asof_locs(self, where, mask): return result - def order(self, return_indexer=False, ascending=True): + def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """ @@ -1347,8 +1347,38 @@ def order(self, return_indexer=False, ascending=True): else: return sorted_index + def order(self, return_indexer=False, ascending=True): + """ + Return sorted copy of Index + + DEPRECATED: use :meth:`Index.sort_values` + """ + warnings.warn("order is deprecated, use sort_values(...)", + FutureWarning, stacklevel=2) + return self.sort_values(return_indexer=return_indexer, ascending=ascending) + def sort(self, *args, **kwargs): - raise TypeError('Cannot sort an %r object' % self.__class__.__name__) + raise TypeError("cannot sort an Index object in-place, use sort_values instead") + + def sortlevel(self, level=None, ascending=True, sort_remaining=None): + """ + + For internal compatibility with with the Index API + + Sort the Index. This is for compat with MultiIndex + + Parameters + ---------- + ascending : boolean, default True + False to sort in descending order + + level, sort_remaining are compat paramaters + + Returns + ------- + sorted_index : Index + """ + return self.sort_values(return_indexer=True, ascending=ascending) def shift(self, periods=1, freq=None): """ @@ -4864,6 +4894,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): If list-like must be names or ints of levels. ascending : boolean, default True False to sort in descending order + Can also be a list to specify a directed ordering sort_remaining : sort by the remaining levels after level. Returns @@ -4872,30 +4903,41 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ from pandas.core.groupby import _indexer_from_factorized - labels = list(self.labels) - shape = list(self.levshape) - if isinstance(level, (compat.string_types, int)): level = [level] level = [self._get_level_number(lev) for lev in level] + sortorder = None + + # we have a directed ordering via ascending + if isinstance(ascending, list): + if not len(level) == len(ascending): + raise ValueError("level must have same length as ascending") - # partition labels and shape - primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) - primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) + from pandas.core.groupby import _lexsort_indexer + indexer = _lexsort_indexer(self.labels, orders=ascending) - if sort_remaining: - primary += primary + tuple(labels) - primshp += primshp + tuple(shape) - sortorder = None + # level ordering else: - sortorder = level[0] - indexer = _indexer_from_factorized(primary, - primshp, - compress=False) + labels = list(self.labels) + shape = list(self.levshape) - if not ascending: - indexer = indexer[::-1] + # partition labels and shape + primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) + primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) + + if sort_remaining: + primary += primary + tuple(labels) + primshp += primshp + tuple(shape) + else: + sortorder = level[0] + + indexer = _indexer_from_factorized(primary, + primshp, + compress=False) + + if not ascending: + indexer = indexer[::-1] indexer = com._ensure_platform_int(indexer) new_labels = [lab.take(indexer) for lab in self.labels] diff --git a/pandas/core/series.py b/pandas/core/series.py index 87fde996aaa67..d5a7f770850bf 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1423,8 +1423,7 @@ def searchsorted(self, v, side='left', sorter=None): See Also -------- - Series.sort - Series.order + Series.sort_values numpy.searchsorted Notes @@ -1602,38 +1601,150 @@ def update(self, other): #---------------------------------------------------------------------- # Reindexing, sorting - def sort_index(self, ascending=True): - """ - Sort object by labels (along an axis) + @Appender(generic._shared_docs['sort_values'] % _shared_doc_kwargs) + def sort_values(self, axis=0, ascending=True, inplace=False, + kind='quicksort', na_position='last'): - Parameters - ---------- - ascending : boolean or list, default True - Sort ascending vs. descending. Specify list for multiple sort - orders + axis = self._get_axis_number(axis) - Examples - -------- - >>> result1 = s.sort_index(ascending=False) - >>> result2 = s.sort_index(ascending=[1, 0]) + # GH 5856/5853 + if inplace and self._is_cached: + raise ValueError("This Series is a view of some other array, to " + "sort in-place you must create a copy") - Returns - ------- - sorted_obj : Series - """ + def _try_kind_sort(arr): + # easier to ask forgiveness than permission + try: + # if kind==mergesort, it can fail for object dtype + return arr.argsort(kind=kind) + except TypeError: + # stable sort not available for object dtype + # uses the argsort default quicksort + return arr.argsort(kind='quicksort') + + arr = self.values + sortedIdx = np.empty(len(self), dtype=np.int32) + + bad = isnull(arr) + + good = ~bad + idx = np.arange(len(self)) + + argsorted = _try_kind_sort(arr[good]) + + if not ascending: + argsorted = argsorted[::-1] + + if na_position == 'last': + n = good.sum() + sortedIdx[:n] = idx[good][argsorted] + sortedIdx[n:] = idx[bad] + elif na_position == 'first': + n = bad.sum() + sortedIdx[n:] = idx[good][argsorted] + sortedIdx[:n] = idx[bad] + else: + raise ValueError('invalid na_position: {!r}'.format(na_position)) + + result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) + + if inplace: + self._update_inplace(result) + else: + return result.__finalize__(self) + + @Appender(generic._shared_docs['sort_index'] % _shared_doc_kwargs) + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, + sort_remaining=True): + + axis = self._get_axis_number(axis) index = self.index - if isinstance(index, MultiIndex): + if level is not None: + new_index, indexer = index.sortlevel(level, ascending=ascending, + sort_remaining=sort_remaining) + elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer indexer = _lexsort_indexer(index.labels, orders=ascending) indexer = com._ensure_platform_int(indexer) - new_labels = index.take(indexer) + new_index = index.take(indexer) else: - new_labels, indexer = index.order(return_indexer=True, - ascending=ascending) + new_index, indexer = index.sort_values(return_indexer=True, + ascending=ascending) new_values = self.values.take(indexer) return self._constructor(new_values, - index=new_labels).__finalize__(self) + index=new_index).__finalize__(self) + + def sort(self, axis=0, ascending=True, kind='quicksort', na_position='last', inplace=True): + """ + DEPRECATED: use :meth:`Series.sort_values(inplace=True)` for INPLACE sorting + + Sort values and index labels by value. This is an inplace sort by default. + Series.order is the equivalent but returns a new Series. + + Parameters + ---------- + axis : int (can only be zero) + ascending : boolean, default True + Sort ascending. Passing False sorts descending + kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See np.sort for more + information. 'mergesort' is the only stable algorithm + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + inplace : boolean, default True + Do operation in place. + + See Also + -------- + Series.sort_values + """ + warnings.warn("sort is deprecated, use sort_values(inplace=True) for for INPLACE sorting", + FutureWarning, stacklevel=2) + + return self.sort_values(ascending=ascending, + kind=kind, + na_position=na_position, + inplace=inplace) + + def order(self, na_last=None, ascending=True, kind='quicksort', na_position='last', inplace=False): + """ + DEPRECATED: use :meth:`Series.sort_values` + + Sorts Series object, by value, maintaining index-value link. + This will return a new Series by default. Series.sort is the equivalent but as an inplace method. + + Parameters + ---------- + na_last : boolean (optional, default=True) (DEPRECATED; use na_position) + Put NaN's at beginning or end + ascending : boolean, default True + Sort ascending. Passing False sorts descending + kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See np.sort for more + information. 'mergesort' is the only stable algorithm + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + inplace : boolean, default False + Do operation in place. + + Returns + ------- + y : Series + + See Also + -------- + Series.sort_values + """ + warnings.warn("order is deprecated, use sort_values(...)", + FutureWarning, stacklevel=2) + + return self.sort_values(ascending=ascending, + kind=kind, + na_position=na_position, + inplace=inplace) def argsort(self, axis=0, kind='quicksort', order=None): """ @@ -1701,114 +1812,6 @@ def rank(self, method='average', na_option='keep', ascending=True, ascending=ascending, pct=pct) return self._constructor(ranks, index=self.index).__finalize__(self) - def sort(self, axis=0, ascending=True, kind='quicksort', na_position='last', inplace=True): - """ - Sort values and index labels by value. This is an inplace sort by default. - Series.order is the equivalent but returns a new Series. - - Parameters - ---------- - axis : int (can only be zero) - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default True - Do operation in place. - - See Also - -------- - Series.order - """ - return self.order(ascending=ascending, - kind=kind, - na_position=na_position, - inplace=inplace) - - def order(self, na_last=None, ascending=True, kind='quicksort', na_position='last', inplace=False): - """ - Sorts Series object, by value, maintaining index-value link. - This will return a new Series by default. Series.sort is the equivalent but as an inplace method. - - Parameters - ---------- - na_last : boolean (optional, default=True) (DEPRECATED; use na_position) - Put NaN's at beginning or end - ascending : boolean, default True - Sort ascending. Passing False sorts descending - kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' - Choice of sorting algorithm. See np.sort for more - information. 'mergesort' is the only stable algorithm - na_position : {'first', 'last'} (optional, default='last') - 'first' puts NaNs at the beginning - 'last' puts NaNs at the end - inplace : boolean, default False - Do operation in place. - - Returns - ------- - y : Series - - See Also - -------- - Series.sort - """ - - # GH 5856/5853 - if inplace and self._is_cached: - raise ValueError("This Series is a view of some other array, to " - "sort in-place you must create a copy") - - if na_last is not None: - warnings.warn(("na_last is deprecated. Please use na_position instead"), - FutureWarning) - na_position = 'last' if na_last else 'first' - - def _try_kind_sort(arr): - # easier to ask forgiveness than permission - try: - # if kind==mergesort, it can fail for object dtype - return arr.argsort(kind=kind) - except TypeError: - # stable sort not available for object dtype - # uses the argsort default quicksort - return arr.argsort(kind='quicksort') - - arr = self.values - sortedIdx = np.empty(len(self), dtype=np.int32) - - bad = isnull(arr) - - good = ~bad - idx = np.arange(len(self)) - - argsorted = _try_kind_sort(arr[good]) - - if not ascending: - argsorted = argsorted[::-1] - - if na_position == 'last': - n = good.sum() - sortedIdx[:n] = idx[good][argsorted] - sortedIdx[n:] = idx[bad] - elif na_position == 'first': - n = bad.sum() - sortedIdx[n:] = idx[good][argsorted] - sortedIdx[:n] = idx[bad] - else: - raise ValueError('invalid na_position: {!r}'.format(na_position)) - - result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) - - if inplace: - self._update_inplace(result) - else: - return result.__finalize__(self) - def nlargest(self, n=5, take_last=False): """Return the largest `n` elements. @@ -1826,7 +1829,7 @@ def nlargest(self, n=5, take_last=False): Notes ----- - Faster than ``.order(ascending=False).head(n)`` for small `n` relative + Faster than ``.sort_values(ascending=False).head(n)`` for small `n` relative to the size of the ``Series`` object. See Also @@ -1859,7 +1862,7 @@ def nsmallest(self, n=5, take_last=False): Notes ----- - Faster than ``.order().head(n)`` for small `n` relative to + Faster than ``.sort_values().head(n)`` for small `n` relative to the size of the ``Series`` object. See Also @@ -1889,15 +1892,13 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): Returns ------- sorted : Series - """ - if not isinstance(self.index, MultiIndex): - raise TypeError('can only sort by level with a hierarchical index') - new_index, indexer = self.index.sortlevel(level, ascending=ascending, - sort_remaining=sort_remaining) - new_values = self.values.take(indexer) - return self._constructor(new_values, - index=new_index).__finalize__(self) + See Also + -------- + Series.sort_index(level=...) + + """ + return self.sort_index(level=level, ascending=ascending, sort_remaining=sort_remaining) def swaplevel(self, i, j, copy=True): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2c9ffe6b74536..8ef6363f836ae 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3973,7 +3973,7 @@ def delete(self, where=None, start=None, stop=None, **kwargs): values = self.selection.select_coords() # delete the rows in reverse order - l = Series(values).order() + l = Series(values).sort_values() ln = len(l) if ln: diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 66c2bbde0b3f8..c577286ceca9a 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -137,9 +137,9 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_dtype=True, raise_ok=None, sort=None): if sort is not None: - df = df.sort(sort) + df = df.sort_values(sort) else: - df = df.sort() + df = df.sort_index() # if we are not unique, then check that we are raising ValueError # for the appropriate orients @@ -162,9 +162,9 @@ def _check_orient(df, orient, dtype=None, numpy=False, raise if sort is not None and sort in unser.columns: - unser = unser.sort(sort) + unser = unser.sort_values(sort) else: - unser = unser.sort() + unser = unser.sort_index() if dtype is False: check_dtype=False @@ -188,7 +188,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, unser.columns = [str(i) for i in unser.columns] if sort is None: - unser = unser.sort() + unser = unser.sort_index() assert_almost_equal(df.values, unser.values) else: if convert_axes: @@ -752,4 +752,4 @@ def my_handler_raises(obj): if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', - '--pdb-failure', '-s'], exit=False) \ No newline at end of file + '--pdb-failure', '-s'], exit=False) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 43e1c5c89dd5e..974d06fb68137 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1246,16 +1246,13 @@ def testDataFrameNumpyLabelled(self): tm.assert_numpy_array_equal(df.index, outp.index) def testSeries(self): - s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) - s.sort() + s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]).sort_values() # column indexed - outp = Series(ujson.decode(ujson.encode(s))) - outp.sort() + outp = Series(ujson.decode(ujson.encode(s))).sort_values() self.assertTrue((s == outp).values.all()) - outp = Series(ujson.decode(ujson.encode(s), numpy=True)) - outp.sort() + outp = Series(ujson.decode(ujson.encode(s), numpy=True)).sort_values() self.assertTrue((s == outp).values.all()) dec = _clean_dict(ujson.decode(ujson.encode(s, orient="split"))) @@ -1281,17 +1278,14 @@ def testSeries(self): outp = Series(ujson.decode(ujson.encode(s, orient="values"))) self.assertTrue((s == outp).values.all()) - outp = Series(ujson.decode(ujson.encode(s, orient="index"))) - outp.sort() + outp = Series(ujson.decode(ujson.encode(s, orient="index"))).sort_values() self.assertTrue((s == outp).values.all()) - outp = Series(ujson.decode(ujson.encode(s, orient="index"), numpy=True)) - outp.sort() + outp = Series(ujson.decode(ujson.encode(s, orient="index"), numpy=True)).sort_values() self.assertTrue((s == outp).values.all()) def testSeriesNested(self): - s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]) - s.sort() + s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6,7,8,9,10,15]).sort_values() nested = {'s1': s, 's2': s.copy()} diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 5b934bad38bd3..a9e93f909406b 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -906,8 +906,8 @@ def test_categorical_sorting(self): parsed_115 = read_stata(self.dta20_115) parsed_117 = read_stata(self.dta20_117) # Sort based on codes, not strings - parsed_115 = parsed_115.sort("srh") - parsed_117 = parsed_117.sort("srh") + parsed_115 = parsed_115.sort_values("srh") + parsed_117 = parsed_117.sort_values("srh") # Don't sort index parsed_115.index = np.arange(parsed_115.shape[0]) parsed_117.index = np.arange(parsed_117.shape[0]) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index c9e4285d8b684..39eb72c0c3aa2 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -510,10 +510,8 @@ def test_value_counts_inferred(self): self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is platform-dep - hist = s.value_counts(sort=False) - hist.sort() - expected = Series([3, 1, 4, 2], index=list('acbd')) - expected.sort() + hist = s.value_counts(sort=False).sort_values() + expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) # sort ascending @@ -653,7 +651,7 @@ def test_factorize(self): # sort by value, and create duplicates if isinstance(o, Series): - o.sort() + o = o.sort_values() n = o.iloc[5:].append(o) else: indexer = o.argsort() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 680b370cbca41..19713984c9d7a 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1045,22 +1045,22 @@ def test_sort(self): # unordered cats are sortable cat = Categorical(["a","b","b","a"], ordered=False) - cat.order() + cat.sort_values() cat.sort() cat = Categorical(["a","c","b","d"], ordered=True) - # order - res = cat.order() + # sort_values + res = cat.sort_values() exp = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) cat = Categorical(["a","c","b","d"], categories=["a","b","c","d"], ordered=True) - res = cat.order() + res = cat.sort_values() exp = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) - res = cat.order(ascending=False) + res = cat.sort_values(ascending=False) exp = np.array(["d","c","b","a"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) @@ -1249,7 +1249,7 @@ def setUp(self): df = DataFrame({'value': np.random.randint(0, 10000, 100)}) labels = [ "{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500) ] - df = df.sort(columns=['value'], ascending=True) + df = df.sort_values(by=['value'], ascending=True) df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) self.cat = df @@ -1665,7 +1665,7 @@ def test_assignment_to_dataframe(self): df = DataFrame({'value': np.array(np.random.randint(0, 10000, 100),dtype='int32')}) labels = [ "{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500) ] - df = df.sort(columns=['value'], ascending=True) + df = df.sort_values(by=['value'], ascending=True) s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) d = s.values df['D'] = d @@ -2548,25 +2548,29 @@ def test_count(self): def test_sort(self): - cat = Series(Categorical(["a","b","b","a"], ordered=False)) + c = Categorical(["a","b","b","a"], ordered=False) + cat = Series(c) + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + c.order() # sort in the categories order expected = Series(Categorical(["a","a","b","b"], ordered=False),index=[0,3,1,2]) - result = cat.order() + result = cat.sort_values() tm.assert_series_equal(result, expected) cat = Series(Categorical(["a","c","b","d"], ordered=True)) - - res = cat.order() + res = cat.sort_values() exp = np.array(["a","b","c","d"]) self.assert_numpy_array_equal(res.__array__(), exp) cat = Series(Categorical(["a","c","b","d"], categories=["a","b","c","d"], ordered=True)) - res = cat.order() + res = cat.sort_values() exp = np.array(["a","b","c","d"]) self.assert_numpy_array_equal(res.__array__(), exp) - res = cat.order(ascending=False) + res = cat.sort_values(ascending=False) exp = np.array(["d","c","b","a"]) self.assert_numpy_array_equal(res.__array__(), exp) @@ -2576,19 +2580,19 @@ def test_sort(self): df = DataFrame({"unsort":raw_cat1,"sort":raw_cat2, "string":s, "values":[1,2,3,4]}) # Cats must be sorted in a dataframe - res = df.sort(columns=["string"], ascending=False) + res = df.sort_values(by=["string"], ascending=False) exp = np.array(["d", "c", "b", "a"]) self.assert_numpy_array_equal(res["sort"].values.__array__(), exp) self.assertEqual(res["sort"].dtype, "category") - res = df.sort(columns=["sort"], ascending=False) - exp = df.sort(columns=["string"], ascending=True) + res = df.sort_values(by=["sort"], ascending=False) + exp = df.sort_values(by=["string"], ascending=True) self.assert_numpy_array_equal(res["values"], exp["values"]) self.assertEqual(res["sort"].dtype, "category") self.assertEqual(res["unsort"].dtype, "category") # unordered cat, but we allow this - df.sort(columns=["unsort"], ascending=False) + df.sort_values(by=["unsort"], ascending=False) # multi-columns sort # GH 7848 @@ -2597,18 +2601,18 @@ def test_sort(self): df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a']) # sorts 'grade' according to the order of the categories - result = df.sort(columns=['grade']) + result = df.sort_values(by=['grade']) expected = df.iloc[[1,2,5,0,3,4]] tm.assert_frame_equal(result,expected) # multi - result = df.sort(columns=['grade', 'id']) + result = df.sort_values(by=['grade', 'id']) expected = df.iloc[[2,1,5,4,3,0]] tm.assert_frame_equal(result,expected) # reverse cat = Categorical(["a","c","c","b","d"], ordered=True) - res = cat.order(ascending=False) + res = cat.sort_values(ascending=False) exp_val = np.array(["d","c", "c", "b","a"],dtype=object) exp_categories = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) @@ -2617,28 +2621,28 @@ def test_sort(self): # some NaN positions cat = Categorical(["a","c","b","d", np.nan], ordered=True) - res = cat.order(ascending=False, na_position='last') + res = cat.sort_values(ascending=False, na_position='last') exp_val = np.array(["d","c","b","a", np.nan],dtype=object) exp_categories = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) cat = Categorical(["a","c","b","d", np.nan], ordered=True) - res = cat.order(ascending=False, na_position='first') + res = cat.sort_values(ascending=False, na_position='first') exp_val = np.array([np.nan, "d","c","b","a"],dtype=object) exp_categories = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) cat = Categorical(["a","c","b","d", np.nan], ordered=True) - res = cat.order(ascending=False, na_position='first') + res = cat.sort_values(ascending=False, na_position='first') exp_val = np.array([np.nan, "d","c","b","a"],dtype=object) exp_categories = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) cat = Categorical(["a","c","b","d", np.nan], ordered=True) - res = cat.order(ascending=False, na_position='last') + res = cat.sort_values(ascending=False, na_position='last') exp_val = np.array(["d","c","b","a",np.nan],dtype=object) exp_categories = np.array(["a","b","c","d"],dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 8c836ae564e28..95426e8648a10 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -670,8 +670,8 @@ def test_setitem_cast(self): df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC')) df['event'] = np.nan df.loc[10,'event'] = 'foo' - result = df.get_dtype_counts().order() - expected = Series({'float64' : 3, 'object' : 1 }).order() + result = df.get_dtype_counts().sort_values() + expected = Series({'float64' : 3, 'object' : 1 }).sort_values() assert_series_equal(result, expected) def test_setitem_boolean_column(self): @@ -1096,8 +1096,7 @@ def test_setitem_fancy_mixed_2d(self): assert_frame_equal(df, expected) def test_ix_align(self): - b = Series(randn(10), name=0) - b.sort() + b = Series(randn(10), name=0).sort_values() df_orig = DataFrame(randn(10, 4)) df = df_orig.copy() @@ -4158,25 +4157,19 @@ def test_timedeltas(self): df = DataFrame(dict(A = Series(date_range('2012-1-1', periods=3, freq='D')), B = Series([ timedelta(days=i) for i in range(3) ]))) - result = df.get_dtype_counts() - expected = Series({'datetime64[ns]': 1, 'timedelta64[ns]' : 1 }) - result.sort() - expected.sort() + result = df.get_dtype_counts().sort_values() + expected = Series({'datetime64[ns]': 1, 'timedelta64[ns]' : 1 }).sort_values() assert_series_equal(result, expected) df['C'] = df['A'] + df['B'] - expected = Series({'datetime64[ns]': 2, 'timedelta64[ns]' : 1 }) - result = df.get_dtype_counts() - result.sort() - expected.sort() + expected = Series({'datetime64[ns]': 2, 'timedelta64[ns]' : 1 }).sort_values() + result = df.get_dtype_counts().sort_values() assert_series_equal(result, expected) # mixed int types df['D'] = 1 - expected = Series({'datetime64[ns]': 2, 'timedelta64[ns]' : 1, 'int64' : 1 }) - result = df.get_dtype_counts() - result.sort() - expected.sort() + expected = Series({'datetime64[ns]': 2, 'timedelta64[ns]' : 1, 'int64' : 1 }).sort_values() + result = df.get_dtype_counts().sort_values() assert_series_equal(result, expected) def test_operators_timedelta64(self): @@ -7464,10 +7457,11 @@ def test_as_matrix_duplicates(self): def test_ftypes(self): frame = self.mixed_float - expected = Series(dict(A = 'float32:dense', B = 'float32:dense', C = 'float16:dense', D = 'float64:dense')) - expected.sort() - result = frame.ftypes - result.sort() + expected = Series(dict(A = 'float32:dense', + B = 'float32:dense', + C = 'float16:dense', + D = 'float64:dense')).sort_values() + result = frame.ftypes.sort_values() assert_series_equal(result,expected) def test_values(self): @@ -8389,13 +8383,13 @@ def test_fillna(self): def test_fillna_dtype_conversion(self): # make sure that fillna on an empty frame works df = DataFrame(index=["A","B","C"], columns = [1,2,3,4,5]) - result = df.get_dtype_counts().order() + result = df.get_dtype_counts().sort_values() expected = Series({ 'object' : 5 }) assert_series_equal(result, expected) result = df.fillna(1) expected = DataFrame(1, index=["A","B","C"], columns = [1,2,3,4,5]) - result = result.get_dtype_counts().order() + result = result.get_dtype_counts().sort_values() expected = Series({ 'int64' : 5 }) assert_series_equal(result, expected) @@ -11197,13 +11191,60 @@ def test_reorder_levels(self): result = df.reorder_levels(['L0', 'L0', 'L0']) assert_frame_equal(result, expected) + def test_sort_values(self): + + # API for 9816 + + # sort_index + frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + frame.sort(columns='A') + with tm.assert_produces_warning(FutureWarning): + frame.sort() + + unordered = frame.ix[[3, 2, 4, 1]] + expected = unordered.sort_index() + + result = unordered.sort_index(axis=0) + assert_frame_equal(result, expected) + + unordered = frame.ix[:, [2, 1, 3, 0]] + expected = unordered.sort_index(axis=1) + + result = unordered.sort_index(axis=1) + assert_frame_equal(result, expected) + assert_frame_equal(result, expected) + + # sortlevel + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) + + result = df.sort_index(level='A', sort_remaining=False) + expected = df.sortlevel('A', sort_remaining=False) + assert_frame_equal(result, expected) + + df = df.T + result = df.sort_index(level='A', axis=1, sort_remaining=False) + expected = df.sortlevel('A', axis=1, sort_remaining=False) + assert_frame_equal(result, expected) + + # MI sort, but no by + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4]], mi) + result = df.sort_index(sort_remaining=False) + expected = df.sort_index() + assert_frame_equal(result, expected) + def test_sort_index(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) # axis=0 unordered = frame.ix[[3, 2, 4, 1]] - sorted_df = unordered.sort_index() + sorted_df = unordered.sort_index(axis=0) expected = frame assert_frame_equal(sorted_df, expected) @@ -11222,46 +11263,42 @@ def test_sort_index(self): assert_frame_equal(sorted_df, expected) # by column - sorted_df = frame.sort_index(by='A') + sorted_df = frame.sort_values(by='A') indexer = frame['A'].argsort().values expected = frame.ix[frame.index[indexer]] assert_frame_equal(sorted_df, expected) - sorted_df = frame.sort_index(by='A', ascending=False) + sorted_df = frame.sort_values(by='A', ascending=False) indexer = indexer[::-1] expected = frame.ix[frame.index[indexer]] assert_frame_equal(sorted_df, expected) - sorted_df = frame.sort(columns='A', ascending=False) + sorted_df = frame.sort_values(by='A', ascending=False) assert_frame_equal(sorted_df, expected) # GH4839 - sorted_df = frame.sort(columns=['A'], ascending=[False]) + sorted_df = frame.sort_values(by=['A'], ascending=[False]) assert_frame_equal(sorted_df, expected) # check for now - sorted_df = frame.sort(columns='A') + sorted_df = frame.sort_values(by='A') assert_frame_equal(sorted_df, expected[::-1]) - expected = frame.sort_index(by='A') + expected = frame.sort_values(by='A') assert_frame_equal(sorted_df, expected) - - sorted_df = frame.sort(columns=['A', 'B'], ascending=False) - expected = frame.sort_index(by=['A', 'B'], ascending=False) - assert_frame_equal(sorted_df, expected) - - sorted_df = frame.sort(columns=['A', 'B']) + expected = frame.sort_values(by=['A', 'B'], ascending=False) + sorted_df = frame.sort_values(by=['A', 'B']) assert_frame_equal(sorted_df, expected[::-1]) - self.assertRaises(ValueError, frame.sort_index, axis=2, inplace=True) + self.assertRaises(ValueError, lambda : frame.sort_values(by=['A','B'], axis=2, inplace=True)) msg = 'When sorting by column, axis must be 0' with assertRaisesRegexp(ValueError, msg): - frame.sort_index(by='A', axis=1) + frame.sort_values(by='A', axis=1) msg = r'Length of ascending \(5\) != length of by \(2\)' with assertRaisesRegexp(ValueError, msg): - frame.sort_index(by=['A', 'B'], axis=0, ascending=[True] * 5) + frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) def test_sort_index_categorical_index(self): @@ -11287,14 +11324,14 @@ def test_sort_nan(self): {'A': [nan, 1, 1, 2, 4, 6, 8], 'B': [5, 9, 2, nan, 5, 5, 4]}, index=[2, 0, 3, 1, 6, 4, 5]) - sorted_df = df.sort(['A'], na_position='first') + sorted_df = df.sort_values(['A'], na_position='first') assert_frame_equal(sorted_df, expected) expected = DataFrame( {'A': [nan, 8, 6, 4, 2, 1, 1], 'B': [5, 4, 5, 5, nan, 9, 2]}, index=[2, 5, 4, 6, 1, 0, 3]) - sorted_df = df.sort(['A'], na_position='first', ascending=False) + sorted_df = df.sort_values(['A'], na_position='first', ascending=False) assert_frame_equal(sorted_df, expected) # na_position='last', order @@ -11302,7 +11339,7 @@ def test_sort_nan(self): {'A': [1, 1, 2, 4, 6, 8, nan], 'B': [2, 9, nan, 5, 5, 4, 5]}, index=[3, 0, 1, 6, 4, 5, 2]) - sorted_df = df.sort(['A','B']) + sorted_df = df.sort_values(['A','B']) assert_frame_equal(sorted_df, expected) # na_position='first', order @@ -11310,7 +11347,7 @@ def test_sort_nan(self): {'A': [nan, 1, 1, 2, 4, 6, 8], 'B': [5, 2, 9, nan, 5, 5, 4]}, index=[2, 3, 0, 1, 6, 4, 5]) - sorted_df = df.sort(['A','B'], na_position='first') + sorted_df = df.sort_values(['A','B'], na_position='first') assert_frame_equal(sorted_df, expected) # na_position='first', not order @@ -11318,7 +11355,7 @@ def test_sort_nan(self): {'A': [nan, 1, 1, 2, 4, 6, 8], 'B': [5, 9, 2, nan, 5, 5, 4]}, index=[2, 0, 3, 1, 6, 4, 5]) - sorted_df = df.sort(['A','B'], ascending=[1,0], na_position='first') + sorted_df = df.sort_values(['A','B'], ascending=[1,0], na_position='first') assert_frame_equal(sorted_df, expected) # na_position='last', not order @@ -11326,7 +11363,7 @@ def test_sort_nan(self): {'A': [8, 6, 4, 2, 1, 1, nan], 'B': [4, 5, 5, nan, 2, 9, 5]}, index=[5, 4, 6, 1, 3, 0, 2]) - sorted_df = df.sort(['A','B'], ascending=[0,1], na_position='last') + sorted_df = df.sort_values(['A','B'], ascending=[0,1], na_position='last') assert_frame_equal(sorted_df, expected) # Test DataFrame with nan label @@ -11335,28 +11372,28 @@ def test_sort_nan(self): index = [1, 2, 3, 4, 5, 6, nan]) # NaN label, ascending=True, na_position='last' - sorted_df = df.sort(kind='quicksort', ascending=True, na_position='last') + sorted_df = df.sort_index(kind='quicksort', ascending=True, na_position='last') expected = DataFrame({'A': [1, 2, nan, 1, 6, 8, 4], 'B': [9, nan, 5, 2, 5, 4, 5]}, index = [1, 2, 3, 4, 5, 6, nan]) assert_frame_equal(sorted_df, expected) # NaN label, ascending=True, na_position='first' - sorted_df = df.sort(na_position='first') + sorted_df = df.sort_index(na_position='first') expected = DataFrame({'A': [4, 1, 2, nan, 1, 6, 8], 'B': [5, 9, nan, 5, 2, 5, 4]}, index = [nan, 1, 2, 3, 4, 5, 6]) assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='last' - sorted_df = df.sort(kind='quicksort', ascending=False) + sorted_df = df.sort_index(kind='quicksort', ascending=False) expected = DataFrame({'A': [8, 6, 1, nan, 2, 1, 4], 'B': [4, 5, 2, 5, nan, 9, 5]}, index = [6, 5, 4, 3, 2, 1, nan]) assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='first' - sorted_df = df.sort(kind='quicksort', ascending=False, na_position='first') + sorted_df = df.sort_index(kind='quicksort', ascending=False, na_position='first') expected = DataFrame({'A': [4, 8, 6, 1, nan, 2, 1], 'B': [5, 4, 5, 2, 5, nan, 9]}, index = [nan, 6, 5, 4, 3, 2, 1]) @@ -11366,8 +11403,8 @@ def test_stable_descending_sort(self): # GH #6399 df = DataFrame([[2, 'first'], [2, 'second'], [1, 'a'], [1, 'b']], columns=['sort_col', 'order']) - sorted_df = df.sort_index(by='sort_col', kind='mergesort', - ascending=False) + sorted_df = df.sort_values(by='sort_col', kind='mergesort', + ascending=False) assert_frame_equal(df, sorted_df) def test_stable_descending_multicolumn_sort(self): @@ -11379,16 +11416,16 @@ def test_stable_descending_multicolumn_sort(self): {'A': [nan, 8, 6, 4, 2, 1, 1], 'B': [5, 4, 5, 5, nan, 2, 9]}, index=[2, 5, 4, 6, 1, 3, 0]) - sorted_df = df.sort(['A','B'], ascending=[0,1], na_position='first', - kind='mergesort') + sorted_df = df.sort_values(['A','B'], ascending=[0,1], na_position='first', + kind='mergesort') assert_frame_equal(sorted_df, expected) expected = DataFrame( {'A': [nan, 8, 6, 4, 2, 1, 1], 'B': [5, 4, 5, 5, nan, 9, 2]}, index=[2, 5, 4, 6, 1, 0, 3]) - sorted_df = df.sort(['A','B'], ascending=[0,0], na_position='first', - kind='mergesort') + sorted_df = df.sort_values(['A','B'], ascending=[0,0], na_position='first', + kind='mergesort') assert_frame_equal(sorted_df, expected) def test_sort_index_multicolumn(self): @@ -11400,18 +11437,27 @@ def test_sort_index_multicolumn(self): frame = DataFrame({'A': A, 'B': B, 'C': np.random.randn(100)}) - result = frame.sort_index(by=['A', 'B']) + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + frame.sort_index(by=['A', 'B']) + result = frame.sort_values(by=['A', 'B']) indexer = np.lexsort((frame['B'], frame['A'])) expected = frame.take(indexer) assert_frame_equal(result, expected) - result = frame.sort_index(by=['A', 'B'], ascending=False) + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + frame.sort_index(by=['A', 'B'], ascending=False) + result = frame.sort_values(by=['A', 'B'], ascending=False) indexer = np.lexsort((frame['B'].rank(ascending=False), frame['A'].rank(ascending=False))) expected = frame.take(indexer) assert_frame_equal(result, expected) - result = frame.sort_index(by=['B', 'A']) + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + frame.sort_index(by=['B', 'A']) + result = frame.sort_values(by=['B', 'A']) indexer = np.lexsort((frame['A'], frame['B'])) expected = frame.take(indexer) assert_frame_equal(result, expected) @@ -11458,7 +11504,10 @@ def test_sort_index_different_sortorder(self): df = DataFrame({'A': A, 'B': B, 'C': np.random.randn(100)}) - result = df.sort_index(by=['A', 'B'], ascending=[1, 0]) + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by=['A', 'B'], ascending=[1, 0]) + result = df.sort_values(by=['A', 'B'], ascending=[1, 0]) ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) @@ -11480,41 +11529,70 @@ def test_sort_inplace(self): columns=['A', 'B', 'C', 'D']) sorted_df = frame.copy() - sorted_df.sort(columns='A', inplace=True) - expected = frame.sort_index(by='A') + sorted_df.sort_values(by='A', inplace=True) + expected = frame.sort_values(by='A') assert_frame_equal(sorted_df, expected) sorted_df = frame.copy() - sorted_df.sort(columns='A', ascending=False, inplace=True) - expected = frame.sort_index(by='A', ascending=False) + sorted_df.sort_values(by='A', ascending=False, inplace=True) + expected = frame.sort_values(by='A', ascending=False) assert_frame_equal(sorted_df, expected) sorted_df = frame.copy() - sorted_df.sort(columns=['A', 'B'], ascending=False, inplace=True) - expected = frame.sort_index(by=['A', 'B'], ascending=False) + sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True) + expected = frame.sort_values(by=['A', 'B'], ascending=False) assert_frame_equal(sorted_df, expected) def test_sort_index_duplicates(self): + + ### with 9816, these are all translated to .sort_values + df = DataFrame([lrange(5,9), lrange(4)], columns=['a', 'a', 'b', 'b']) with assertRaisesRegexp(ValueError, 'duplicate'): - df.sort_index(by='a') + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by='a') + with assertRaisesRegexp(ValueError, 'duplicate'): + df.sort_values(by='a') + + with assertRaisesRegexp(ValueError, 'duplicate'): + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by=['a']) with assertRaisesRegexp(ValueError, 'duplicate'): - df.sort_index(by=['a']) + df.sort_values(by=['a']) + + with assertRaisesRegexp(ValueError, 'duplicate'): + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + # multi-column 'by' is separate codepath + df.sort_index(by=['a', 'b']) with assertRaisesRegexp(ValueError, 'duplicate'): # multi-column 'by' is separate codepath - df.sort_index(by=['a', 'b']) + df.sort_values(by=['a', 'b']) # with multi-index # GH4370 df = DataFrame(np.random.randn(4,2),columns=MultiIndex.from_tuples([('a',0),('a',1)])) with assertRaisesRegexp(ValueError, 'levels'): - df.sort_index(by='a') + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by='a') + with assertRaisesRegexp(ValueError, 'levels'): + df.sort_values(by='a') # convert tuples to a list of tuples - expected = df.sort_index(by=[('a',1)]) - result = df.sort_index(by=('a',1)) + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by=[('a',1)]) + expected = df.sort_values(by=[('a',1)]) + + # use .sort_values #9816 + with tm.assert_produces_warning(FutureWarning): + df.sort_index(by=('a',1)) + result = df.sort_values(by=('a',1)) assert_frame_equal(result, expected) def test_sortlevel(self): @@ -11540,21 +11618,21 @@ def test_sort_datetimes(self): df['C'] = 2. df['A1'] = 3. - df1 = df.sort(columns='A') - df2 = df.sort(columns=['A']) + df1 = df.sort_values(by='A') + df2 = df.sort_values(by=['A']) assert_frame_equal(df1,df2) - df1 = df.sort(columns='B') - df2 = df.sort(columns=['B']) + df1 = df.sort_values(by='B') + df2 = df.sort_values(by=['B']) assert_frame_equal(df1,df2) def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] with assertRaisesRegexp(ValueError, "This Series is a view"): - s.sort() + s.sort_values(inplace=True) cp = s.copy() - cp.sort() # it works! + cp.sort_values() # it works! def test_combine_first(self): # disjoint @@ -13632,7 +13710,7 @@ def test_construction_with_mixed(self): df = DataFrame(data) # check dtypes - result = df.get_dtype_counts().order() + result = df.get_dtype_counts().sort_values() expected = Series({ 'datetime64[ns]' : 3 }) # mixed-type frames @@ -13640,11 +13718,11 @@ def test_construction_with_mixed(self): self.mixed_frame['timedelta'] = timedelta(days=1,seconds=1) self.assertEqual(self.mixed_frame['datetime'].dtype, 'M8[ns]') self.assertEqual(self.mixed_frame['timedelta'].dtype, 'm8[ns]') - result = self.mixed_frame.get_dtype_counts().order() + result = self.mixed_frame.get_dtype_counts().sort_values() expected = Series({ 'float64' : 4, 'object' : 1, 'datetime64[ns]' : 1, - 'timedelta64[ns]' : 1}).order() + 'timedelta64[ns]' : 1}).sort_values() assert_series_equal(result,expected) def test_construction_with_conversions(self): @@ -14311,7 +14389,7 @@ def _check_f(base, f): _check_f(data.copy(), f) # sort - f = lambda x: x.sort('b', inplace=True) + f = lambda x: x.sort_values('b', inplace=True) _check_f(data.copy(), f) # sort_index @@ -14824,7 +14902,7 @@ def test_nlargest(self): df = pd.DataFrame({'a': np.random.permutation(10), 'b': list(ascii_lowercase[:10])}) result = df.nlargest(5, 'a') - expected = df.sort('a', ascending=False).head(5) + expected = df.sort_values('a', ascending=False).head(5) tm.assert_frame_equal(result, expected) def test_nlargest_multiple_columns(self): @@ -14833,7 +14911,7 @@ def test_nlargest_multiple_columns(self): 'b': list(ascii_lowercase[:10]), 'c': np.random.permutation(10).astype('float64')}) result = df.nlargest(5, ['a', 'b']) - expected = df.sort(['a', 'b'], ascending=False).head(5) + expected = df.sort_values(['a', 'b'], ascending=False).head(5) tm.assert_frame_equal(result, expected) def test_nsmallest(self): @@ -14841,7 +14919,7 @@ def test_nsmallest(self): df = pd.DataFrame({'a': np.random.permutation(10), 'b': list(ascii_lowercase[:10])}) result = df.nsmallest(5, 'a') - expected = df.sort('a').head(5) + expected = df.sort_values('a').head(5) tm.assert_frame_equal(result, expected) def test_nsmallest_multiple_columns(self): @@ -14850,7 +14928,7 @@ def test_nsmallest_multiple_columns(self): 'b': list(ascii_lowercase[:10]), 'c': np.random.permutation(10).astype('float64')}) result = df.nsmallest(5, ['a', 'c']) - expected = df.sort(['a', 'c']).head(5) + expected = df.sort_values(['a', 'c']).head(5) tm.assert_frame_equal(result, expected) def test_to_panel_expanddim(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index feb3c10a729ae..a306b2887571c 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -533,10 +533,8 @@ def max_value(group): return group.ix[group['value'].idxmax()] applied = df.groupby('A').apply(max_value) - result = applied.get_dtype_counts() - result.sort() - expected = Series({ 'object' : 2, 'float64' : 2, 'int64' : 1 }) - expected.sort() + result = applied.get_dtype_counts().sort_values() + expected = Series({ 'object' : 2, 'float64' : 2, 'int64' : 1 }).sort_values() assert_series_equal(result,expected) def test_groupby_return_type(self): @@ -2197,11 +2195,11 @@ def test_apply_frame_to_series(self): def test_apply_frame_concat_series(self): def trans(group): - return group.groupby('B')['C'].sum().order()[:2] + return group.groupby('B')['C'].sum().sort_values()[:2] def trans2(group): grouped = group.groupby(df.reindex(group.index)['B']) - return grouped.sum().order()[:2] + return grouped.sum().sort_values()[:2] df = DataFrame({'A': np.random.randint(0, 5, 1000), 'B': np.random.randint(0, 5, 1000), @@ -2223,7 +2221,7 @@ def test_apply_multikey_corner(self): lambda x: x.month]) def f(group): - return group.sort('A')[-5:] + return group.sort_values('A')[-5:] result = grouped.apply(f) for key, group in grouped: @@ -2284,7 +2282,7 @@ def test_apply_no_name_column_conflict(self): # it works! #2605 grouped = df.groupby(['name', 'name2']) - grouped.apply(lambda x: x.sort('value')) + grouped.apply(lambda x: x.sort_values('value',inplace=True)) def test_groupby_series_indexed_differently(self): s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7], @@ -3164,21 +3162,21 @@ def test_skip_group_keys(self): tsf = tm.makeTimeDataFrame() grouped = tsf.groupby(lambda x: x.month, group_keys=False) - result = grouped.apply(lambda x: x.sort_index(by='A')[:3]) + result = grouped.apply(lambda x: x.sort_values(by='A')[:3]) pieces = [] for key, group in grouped: - pieces.append(group.sort_index(by='A')[:3]) + pieces.append(group.sort_values(by='A')[:3]) expected = concat(pieces) assert_frame_equal(result, expected) grouped = tsf['A'].groupby(lambda x: x.month, group_keys=False) - result = grouped.apply(lambda x: x.order()[:3]) + result = grouped.apply(lambda x: x.sort_values()[:3]) pieces = [] for key, group in grouped: - pieces.append(group.order()[:3]) + pieces.append(group.sort_values()[:3]) expected = concat(pieces) assert_series_equal(result, expected, check_names=False) @@ -3924,7 +3922,7 @@ def test_groupby_with_timegrouper(self): ]}) # GH 6908 change target column's order - df_reordered = df_original.sort(columns='Quantity') + df_reordered = df_original.sort_values(by='Quantity') for df in [df_original, df_reordered]: df = df.set_index(['Date']) @@ -3962,7 +3960,7 @@ def test_groupby_with_timegrouper_methods(self): DT.datetime(2013,12,2,14,0), ]}) - df_sorted = df_original.sort(columns='Quantity', ascending=False) + df_sorted = df_original.sort_values(by='Quantity', ascending=False) for df in [df_original, df_sorted]: df = df.set_index('Date', drop=False) @@ -3995,7 +3993,7 @@ def test_timegrouper_with_reg_groups(self): DT.datetime(2013,12,2,14,0), ]}).set_index('Date') - df_sorted = df_original.sort(columns='Quantity', ascending=False) + df_sorted = df_original.sort_values(by='Quantity', ascending=False) for df in [df_original, df_sorted]: expected = DataFrame({ @@ -4037,7 +4035,7 @@ def test_timegrouper_with_reg_groups(self): DT.datetime(2013,10,2,14,0), ]}).set_index('Date') - df_sorted = df_original.sort(columns='Quantity', ascending=False) + df_sorted = df_original.sort_values(by='Quantity', ascending=False) for df in [df_original, df_sorted]: expected = DataFrame({ @@ -4146,7 +4144,7 @@ def test_timegrouper_get_group(self): 'Date' : [datetime(2013,9,1,13,0), datetime(2013,9,1,13,5), datetime(2013,10,1,20,0), datetime(2013,10,3,10,0), datetime(2013,12,2,12,0), datetime(2013,9,2,14,0),]}) - df_reordered = df_original.sort(columns='Quantity') + df_reordered = df_original.sort_values(by='Quantity') # single grouping expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], @@ -4174,7 +4172,7 @@ def test_timegrouper_get_group(self): # with index df_original = df_original.set_index('Date') - df_reordered = df_original.sort(columns='Quantity') + df_reordered = df_original.sort_values(by='Quantity') expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]], df_original.iloc[[4]]] @@ -4369,7 +4367,7 @@ def test_filter_against_workaround(self): f = lambda x: x.mean() > 10 old_way = s[grouped.transform(f).astype('bool')] new_way = grouped.filter(f) - assert_series_equal(new_way.order(), old_way.order()) + assert_series_equal(new_way.sort_values(), old_way.sort_values()) # Series of floats s = 100*Series(np.random.random(1000)) @@ -4378,7 +4376,7 @@ def test_filter_against_workaround(self): f = lambda x: x.mean() > 10 old_way = s[grouped.transform(f).astype('bool')] new_way = grouped.filter(f) - assert_series_equal(new_way.order(), old_way.order()) + assert_series_equal(new_way.sort_values(), old_way.sort_values()) # Set up DataFrame of ints, floats, strings. from string import ascii_lowercase diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 9a3576a8fd846..c2e6cda500dab 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -232,6 +232,12 @@ def test_sort(self): for ind in self.indices.values(): self.assertRaises(TypeError, ind.sort) + def test_order(self): + for ind in self.indices.values(): + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + ind.order() + def test_mutability(self): for ind in self.indices.values(): if not len(ind): diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 2c0bfcd9b905d..a613115054385 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1698,7 +1698,7 @@ def loop(mi, df, keys): for frame in a, b: for i in range(5): # lexsort depth - df = frame.copy() if i == 0 else frame.sort(columns=cols[:i]) + df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) mi = df.set_index(cols[:-1]) assert not mi.index.lexsort_depth < i loop(mi, df, keys) @@ -2958,7 +2958,7 @@ def test_non_unique_loc(self): self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)])) # monotonic are ok - df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort(axis=0) + df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort_index(axis=0) result = df.loc[1:] expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3]) assert_frame_equal(result,expected) @@ -3866,10 +3866,9 @@ def f(): self.assertRaises(com.SettingWithCopyError, f) df = DataFrame(np.random.randn(10,4)) - s = df.iloc[:,0] - s = s.order() - assert_series_equal(s,df.iloc[:,0].order()) - assert_series_equal(s,df[0].order()) + s = df.iloc[:,0].sort_values() + assert_series_equal(s,df.iloc[:,0].sort_values()) + assert_series_equal(s,df[0].sort_values()) # false positives GH6025 df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index be7ed6c1b268f..1bce047f3bf96 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -746,14 +746,11 @@ def test_getitem_partial_column_select(self): def test_sortlevel(self): df = self.frame.copy() df.index = np.arange(len(df)) - assertRaisesRegexp(TypeError, 'hierarchical index', df.sortlevel, 0) # axis=1 # series a_sorted = self.frame['A'].sortlevel(0) - with assertRaisesRegexp(TypeError, 'hierarchical index'): - self.frame.reset_index()['A'].sortlevel() # preserve names self.assertEqual(a_sorted.index.names, self.frame.index.names) @@ -935,7 +932,7 @@ def test_stack(self): # columns unsorted unstacked = self.ymd.unstack() - unstacked = unstacked.sort(axis=1, ascending=False) + unstacked = unstacked.sort_index(axis=1, ascending=False) restacked = unstacked.stack() assert_frame_equal(restacked, self.ymd) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 36a8600e51725..06b14d0f0b609 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -9,6 +9,7 @@ from itertools import product, starmap from distutils.version import LooseVersion import warnings +import random import nose @@ -2234,7 +2235,7 @@ def test_ix_setitem_corner(self): inds + ['foo'], 5) def test_get_set_boolean_different_order(self): - ordered = self.series.order() + ordered = self.series.sort_values() # setting copy = self.series.copy() @@ -4869,43 +4870,45 @@ def test_drop_duplicates(self): sc.drop_duplicates(keep=False, inplace=True) assert_series_equal(sc, s[~expected]) - def test_sort(self): + def test_sort_values(self): + ts = self.ts.copy() - ts.sort() - self.assert_numpy_array_equal(ts, self.ts.order()) - self.assert_numpy_array_equal(ts.index, self.ts.order().index) + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + ts.sort() + + self.assert_numpy_array_equal(ts, self.ts.sort_values()) + self.assert_numpy_array_equal(ts.index, self.ts.sort_values().index) - ts.sort(ascending=False) - self.assert_numpy_array_equal(ts, self.ts.order(ascending=False)) + ts.sort_values(ascending=False, inplace=True) + self.assert_numpy_array_equal(ts, self.ts.sort_values(ascending=False)) self.assert_numpy_array_equal(ts.index, - self.ts.order(ascending=False).index) + self.ts.sort_values(ascending=False).index) # GH 5856/5853 - # Series.sort operating on a view + # Series.sort_values operating on a view df = DataFrame(np.random.randn(10,4)) s = df.iloc[:,0] def f(): - s.sort() + s.sort_values(inplace=True) self.assertRaises(ValueError, f) # test order/sort inplace # GH6859 ts1 = self.ts.copy() - ts1.sort(ascending=False) + ts1.sort_values(ascending=False, inplace=True) ts2 = self.ts.copy() - ts2.order(ascending=False,inplace=True) + ts2.sort_values(ascending=False, inplace=True) assert_series_equal(ts1,ts2) ts1 = self.ts.copy() - ts1 = ts1.sort(ascending=False,inplace=False) + ts1 = ts1.sort_values(ascending=False, inplace=False) ts2 = self.ts.copy() - ts2 = ts.order(ascending=False) + ts2 = ts.sort_values(ascending=False) assert_series_equal(ts1,ts2) def test_sort_index(self): - import random - rindex = list(self.ts.index) random.shuffle(rindex) @@ -4918,29 +4921,65 @@ def test_sort_index(self): assert_series_equal(sorted_series, self.ts.reindex(self.ts.index[::-1])) + def test_sort_API(self): + + # API for 9816 + + # sortlevel + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + res = s.sort_index(level='A') + assert_series_equal(backwards, res) + + # sort_index + rindex = list(self.ts.index) + random.shuffle(rindex) + + random_order = self.ts.reindex(rindex) + sorted_series = random_order.sort_index(level=0) + assert_series_equal(sorted_series, self.ts) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, lambda : random_order.sort_values(axis=1)) + + sorted_series = random_order.sort_index(level=0, axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, lambda : random_order.sort_index(level=0, axis=1)) + def test_order(self): + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + self.ts.order() + ts = self.ts.copy() ts[:5] = np.NaN vals = ts.values - result = ts.order() + result = ts.sort_values() self.assertTrue(np.isnan(result[-5:]).all()) self.assert_numpy_array_equal(result[:-5], np.sort(vals[5:])) - result = ts.order(na_position='first') + result = ts.sort_values(na_position='first') self.assertTrue(np.isnan(result[:5]).all()) self.assert_numpy_array_equal(result[5:], np.sort(vals[5:])) # something object-type ser = Series(['A', 'B'], [1, 2]) # no failure - ser.order() + ser.sort_values() # ascending=False - ordered = ts.order(ascending=False) + ordered = ts.sort_values(ascending=False) expected = np.sort(ts.valid().values)[::-1] assert_almost_equal(expected, ordered.valid().values) - ordered = ts.order(ascending=False, na_position='first') + ordered = ts.sort_values(ascending=False, na_position='first') assert_almost_equal(expected, ordered.valid().values) def test_nsmallest_nlargest(self): @@ -4996,8 +5035,8 @@ def test_nsmallest_nlargest(self): assert_series_equal(s.nlargest(0), empty) assert_series_equal(s.nlargest(-1), empty) - assert_series_equal(s.nsmallest(len(s)), s.order()) - assert_series_equal(s.nsmallest(len(s) + 1), s.order()) + assert_series_equal(s.nsmallest(len(s)), s.sort_values()) + assert_series_equal(s.nsmallest(len(s) + 1), s.sort_values()) assert_series_equal(s.nlargest(len(s)), s.iloc[[4, 0, 1, 3, 2]]) assert_series_equal(s.nlargest(len(s) + 1), s.iloc[[4, 0, 1, 3, 2]]) @@ -7399,7 +7438,7 @@ def test_repeat(self): def test_unique_data_ownership(self): # it works! #1807 - Series(Series(["a", "c", "b"]).unique()).sort() + Series(Series(["a", "c", "b"]).unique()).sort_values() def test_datetime_timedelta_quantiles(self): # covers #9694 diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 727852ced25b0..0641de22d0d6a 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -151,7 +151,7 @@ def map(self, f): except Exception: return _algos.arrmap_object(self.asobject.values, f) - def order(self, return_indexer=False, ascending=True): + def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """