Skip to content

Commit

Permalink
BUG: Avoid sentinel-infinity comparison problems (#13445)
Browse files Browse the repository at this point in the history
The problem causing #13445 ultimately traces to the fact that our
Infinity/NegInfinity objects were greater than/less than themselves,
which violates an assumption numpy makes when sorting.  This was
separately reported as numpy/numpy#7934, but
we can fix and test downstream as well.

closes #13445

Author: Douglas McNeil <dmcneil@mackenzieinvestments.com>

Closes #14006 from dsm054/fix_rank_segfault and squashes the following commits:

7d79370 [Douglas McNeil] BUG: Avoid sentinel-infinity comparison problems (#13445)
  • Loading branch information
dsm054 authored and jreback committed Aug 16, 2016
1 parent 4d6a40a commit 5c27c02
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 16 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,7 @@ Bug Fixes
- Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`)

- Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`)
- Bug in ranking ``Series`` which could result in segfaults (:issue:`13445`)
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`)
Expand Down
31 changes: 15 additions & 16 deletions pandas/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -567,28 +567,27 @@ cdef inline are_diff(object left, object right):
except TypeError:
return left != right

_return_false = lambda self, other: False
_return_true = lambda self, other: True

class Infinity(object):
""" provide a positive Infinity comparision method for ranking """

__lt__ = _return_false
__le__ = _return_false
__eq__ = _return_false
__ne__ = _return_true
__gt__ = _return_true
__ge__ = _return_true
__cmp__ = _return_false
__lt__ = lambda self, other: False
__le__ = lambda self, other: self is other
__eq__ = lambda self, other: self is other
__ne__ = lambda self, other: self is not other
__gt__ = lambda self, other: self is not other
__ge__ = lambda self, other: True

class NegInfinity(object):
""" provide a negative Infinity comparision method for ranking """

__lt__ = lambda self, other: self is not other
__le__ = lambda self, other: True
__eq__ = lambda self, other: self is other
__ne__ = lambda self, other: self is not other
__gt__ = lambda self, other: False
__ge__ = lambda self, other: self is other

__lt__ = _return_true
__le__ = _return_true
__eq__ = _return_false
__ne__ = _return_true
__gt__ = _return_false
__ge__ = _return_false
__cmp__ = _return_true

def rank_2d_generic(object in_arr, axis=0, ties_method='average',
ascending=True, na_option='keep', pct=False):
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from numpy.random import RandomState
from numpy import nan
from datetime import datetime
from itertools import permutations
from pandas import Series, Categorical, CategoricalIndex, Index
import pandas as pd

Expand Down Expand Up @@ -1270,6 +1271,35 @@ def test_groupsort_indexer():
assert (np.array_equal(result, expected))


def test_infinity_sort():
# GH 13445
# numpy's argsort can be unhappy if something is less than
# itself. Instead, let's give our infinities a self-consistent
# ordering, but outside the float extended real line.

Inf = _algos.Infinity()
NegInf = _algos.NegInfinity()

ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf]

assert all(Inf >= x for x in ref_nums)
assert all(Inf > x or x is Inf for x in ref_nums)
assert Inf >= Inf and Inf == Inf
assert not Inf < Inf and not Inf > Inf

assert all(NegInf <= x for x in ref_nums)
assert all(NegInf < x or x is NegInf for x in ref_nums)
assert NegInf <= NegInf and NegInf == NegInf
assert not NegInf < NegInf and not NegInf > NegInf

for perm in permutations(ref_nums):
assert sorted(perm) == ref_nums

# smoke tests
np.array([_algos.Infinity()] * 32).argsort()
np.array([_algos.NegInfinity()] * 32).argsort()


def test_ensure_platform_int():
arr = np.arange(100)

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,13 @@ def test_rank_int(self):
expected.index = result.index
assert_series_equal(result, expected)

def test_rank_object_bug(self):
# GH 13445

# smoke tests
Series([np.nan] * 32).astype(object).rank(ascending=True)
Series([np.nan] * 32).astype(object).rank(ascending=False)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down

0 comments on commit 5c27c02

Please sign in to comment.