Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR/BUG: fix Series.argmin/max #16964

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5329,7 +5329,7 @@ def idxmin(self, axis=0, skipna=True):

def idxmax(self, axis=0, skipna=True):
"""
Return index of first occurrence of maximum over requested axis.
Return label of first occurrence of maximum over requested axis.
NA/null values are excluded.

Parameters
Expand Down Expand Up @@ -5358,6 +5358,64 @@ def idxmax(self, axis=0, skipna=True):
result = [index[i] if i >= 0 else NA for i in indices]
return Series(result, index=self._get_agg_axis(axis))

def argmin(self, axis=0, skipna=True):
"""
Return index of first occurrence of minimum over requested axis.
NA/null values are excluded.

Parameters
----------
axis : {0 or 'index', 1 or 'columns'}, default 0
0 or 'index' for row-wise, 1 or 'columns' for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA

Returns
-------
argmin : Series

Notes
-----
This method is the DataFrame version of ``ndarray.argmin``.

See Also
--------
Series.idxmin
"""
axis = self._get_axis_number(axis)
indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna)
return Series(indices, index=self._get_agg_axis(axis))

def argmax(self, axis=0, skipna=True):
"""
Return index of first occurrence of maximum over requested axis.
NA/null values are excluded.

Parameters
----------
axis : {0 or 'index', 1 or 'columns'}, default 0
0 or 'index' for row-wise, 1 or 'columns' for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be first index.

Returns
-------
argmax : Series

Notes
-----
This method is the DataFrame version of ``ndarray.argmax``.

See Also
--------
Series.argmax
"""
axis = self._get_axis_number(axis)
indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna)
return Series(indices, index=self._get_agg_axis(axis))

def _get_agg_axis(self, axis_num):
""" let's be explict about this """
if axis_num == 0:
Expand Down
67 changes: 57 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@
import pandas.core.common as com
import pandas.core.nanops as nanops
import pandas.io.formats.format as fmt
from pandas.util._decorators import Appender, deprecate_kwarg, Substitution
from pandas.util._decorators import (
Appender, deprecate_kwarg, Substitution)
from pandas.util._validators import validate_bool_kwarg

from pandas._libs import index as libindex, tslib as libts, lib, iNaT
Expand Down Expand Up @@ -1238,7 +1239,7 @@ def duplicated(self, keep='first'):

def idxmin(self, axis=None, skipna=True, *args, **kwargs):
"""
Index of first occurrence of minimum of values.
Label of first occurrence of minimum of values.

Parameters
----------
Expand All @@ -1258,15 +1259,14 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
DataFrame.idxmin
numpy.ndarray.argmin
"""
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
i = nanops.nanargmin(_values_from_object(self), skipna=skipna)
i = self.argmin(axis, skipna, *args, **kwargs)
if i == -1:
return np.nan
return self.index[i]

def idxmax(self, axis=None, skipna=True, *args, **kwargs):
"""
Index of first occurrence of maximum of values.
Label of first occurrence of maximum of values.

Parameters
----------
Expand All @@ -1286,15 +1286,62 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
DataFrame.idxmax
numpy.ndarray.argmax
"""
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
i = nanops.nanargmax(_values_from_object(self), skipna=skipna)
i = self.argmax(axis, skipna, *args, **kwargs)
if i == -1:
return np.nan
return self.index[i]

# ndarray compat
argmin = idxmin
argmax = idxmax
def argmin(self, axis=None, skipna=True, *args, **kwargs):
"""
Index of first occurrence of minimum of values.

Parameters
----------
skipna : boolean, default True
Exclude NA/null values

Returns
-------
idxmin : Index of minimum of values

Notes
-----
This method is the Series version of ``ndarray.argmin``.

See Also
--------
DataFrame.argmin
numpy.ndarray.argmin
"""
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
i = nanops.nanargmin(_values_from_object(self), skipna=skipna)
return i

def argmax(self, axis=None, skipna=True, *args, **kwargs):
"""
Index of first occurrence of maximum of values.

Parameters
----------
skipna : boolean, default True
Exclude NA/null values

Returns
-------
idxmax : Index of maximum of values

Notes
-----
This method is the Series version of ``ndarray.argmax``.

See Also
--------
DataFrame.argmax
numpy.ndarray.argmax
"""
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
i = nanops.nanargmax(_values_from_object(self), skipna=skipna)
return i

def round(self, decimals=0, *args, **kwargs):
"""
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,34 @@ def test_idxmax(self):

pytest.raises(ValueError, frame.idxmax, axis=2)

def test_argmin(self):
frame = self.frame
frame.loc[5:10] = np.nan
frame.loc[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
for df in [frame, self.intframe]:
result = df.argmin(axis=axis, skipna=skipna)
expected = df.apply(Series.argmin, axis=axis,
skipna=skipna)
tm.assert_series_equal(result, expected)

pytest.raises(ValueError, frame.argmin, axis=2)

def test_argmax(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use parametrize on these

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These new functions were written to follow the existing structure of the idxmin and idxmax functions. Would you like me to parametrize those functions as well so as to not introduce an inconsistency in the testing methodology?

Copy link
Member

@gfyoung gfyoung Jul 16, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, he's referring to the fact that you were using nested for-loops for testing. You can avoid those by parametrizing as illustrated here.

frame = self.frame
frame.loc[5:10] = np.nan
frame.loc[15:20, -2:] = np.nan
for skipna in [True, False]:
for axis in [0, 1]:
for df in [frame, self.intframe]:
result = df.argmax(axis=axis, skipna=skipna)
expected = df.apply(Series.argmax, axis=axis,
skipna=skipna)
tm.assert_series_equal(result, expected)

pytest.raises(ValueError, frame.argmax, axis=2)

# ----------------------------------------------------------------------
# Logical reductions

Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,10 +1211,10 @@ def test_idxmin(self):
assert result == 1

def test_numpy_argmin(self):
# argmin is aliased to idxmin
data = np.random.randint(0, 11, size=10)
result = np.argmin(Series(data))
assert result == np.argmin(data)
assert result == Series(data).argmin()

if not _np_version_under1p10:
msg = "the 'out' parameter is not supported"
Expand Down Expand Up @@ -1266,11 +1266,10 @@ def test_idxmax(self):
assert result == 1.1

def test_numpy_argmax(self):

# argmax is aliased to idxmax
data = np.random.randint(0, 11, size=10)
result = np.argmax(Series(data))
assert result == np.argmax(data)
assert result == Series(data).argmax()

if not _np_version_under1p10:
msg = "the 'out' parameter is not supported"
Expand Down