From 2688cbe2a20b41d199e9f8257384b9d719480eb4 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 14 Nov 2018 07:26:24 -0700
Subject: [PATCH] BUG: Fix Series/DataFrame.rank(pct=True) with more than 2**24
 rows (#23688)

---
 doc/source/whatsnew/v0.24.0.txt       | 1 +
 pandas/_libs/algos_rank_helper.pxi.in | 4 ++--
 pandas/tests/frame/test_rank.py       | 7 +++++++
 pandas/tests/series/test_rank.py      | 7 +++++++
 pandas/tests/test_algos.py            | 9 +++++++++
 5 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 19af38954e282..f6a27e4c68ce0 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -1205,6 +1205,7 @@ Numeric
 - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`)
 - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`).
 - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
+- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
 
 Strings
 ^^^^^^^
diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in
index 4d144dcf2808a..329c368e13d6d 100644
--- a/pandas/_libs/algos_rank_helper.pxi.in
+++ b/pandas/_libs/algos_rank_helper.pxi.in
@@ -53,7 +53,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
         int tiebreak = 0
         bint keep_na = 0
         bint isnan
-        float count = 0.0
+        float64_t count = 0.0
     tiebreak = tiebreakers[ties_method]
 
     {{if dtype == 'float64'}}
@@ -228,7 +228,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average',
         float64_t sum_ranks = 0
         int tiebreak = 0
         bint keep_na = 0
-        float count = 0.0
+        float64_t count = 0.0
 
     tiebreak = tiebreakers[ties_method]
 
diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py
index 078c48539de16..eaba5f7ec7790 100644
--- a/pandas/tests/frame/test_rank.py
+++ b/pandas/tests/frame/test_rank.py
@@ -309,3 +309,10 @@ def test_rank_pct_true(self, method, exp):
 
         expected = DataFrame(exp)
         tm.assert_frame_equal(result, expected)
+
+    def test_pct_max_many_rows(self):
+        # GH 18271
+        df = DataFrame({'A': np.arange(2**24 + 1),
+                        'B': np.arange(2**24 + 1, 0, -1)})
+        result = df.rank(pct=True).max()
+        assert (result == 1).all()
diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py
index 9772ceecfc7b1..5b0ea37a0bfcf 100644
--- a/pandas/tests/series/test_rank.py
+++ b/pandas/tests/series/test_rank.py
@@ -495,3 +495,10 @@ def test_rank_first_pct(dtype, ser, exp):
         result = s.rank(method='first', pct=True)
         expected = Series(exp).astype(result.dtype)
         assert_series_equal(result, expected)
+
+
+def test_pct_max_many_rows():
+        # GH 18271
+        s = Series(np.arange(2**24 + 1))
+        result = s.rank(pct=True).max()
+        assert result == 1
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 3642c4ee98a9e..ff505f2986b1a 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1462,6 +1462,15 @@ def test_too_many_ndims(self):
         with pytest.raises(TypeError, match=msg):
             algos.rank(arr)
 
+    @pytest.mark.parametrize('values', [
+        np.arange(2**24 + 1),
+        np.arange(2**25 + 2).reshape(2**24 + 1, 2)],
+        ids=['1d', '2d'])
+    def test_pct_max_many_rows(self, values):
+        # GH 18271
+        result = algos.rank(values, pct=True).max()
+        assert result == 1
+
 
 def test_pad_backfill_object_segfault():