From d7ff4e6c3254a5f197f90531b3f20f064eab1916 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 1 Jan 2020 09:18:20 -0800
Subject: [PATCH] PERF: perform reductions block-wise (#29847)

---
 pandas/core/frame.py                 | 20 ++++++++++++++++++++
 pandas/core/internals/managers.py    | 26 ++++++++++++++++++++++++++
 pandas/core/nanops.py                |  2 +-
 pandas/tests/groupby/test_groupby.py |  2 +-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0951f635b0e093..732b28d6a97fe1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7746,6 +7746,26 @@ def _get_data(axis_matters):
                 raise NotImplementedError(msg)
             return data
 
+        if numeric_only is not None and axis in [0, 1]:
+            df = self
+            if numeric_only is True:
+                df = _get_data(axis_matters=True)
+            if axis == 1:
+                df = df.T
+                axis = 0
+
+            out_dtype = "bool" if filter_type == "bool" else None
+
+            # After possibly _get_data and transposing, we are now in the
+            #  simple case where we can use BlockManager._reduce
+            res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
+            assert isinstance(res, dict)
+            if len(res):
+                assert len(res) == max(list(res.keys())) + 1, res.keys()
+            out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
+            out.index = df.columns
+            return out
+
         if numeric_only is None:
             values = self.values
             try:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 32cd65b4dc7d68..995e6f0aaad8e7 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -340,6 +340,32 @@ def _verify_integrity(self):
                 f"tot_items: {tot_items}"
             )
 
+    def reduce(self, func, *args, **kwargs):
+        # If 2D, we assume that we're operating column-wise
+        if self.ndim == 1:
+            # we'll be returning a scalar
+            blk = self.blocks[0]
+            return func(blk.values, *args, **kwargs)
+
+        res = {}
+        for blk in self.blocks:
+            bres = func(blk.values, *args, **kwargs)
+
+            if np.ndim(bres) == 0:
+                # EA
+                assert blk.shape[0] == 1
+                new_res = zip(blk.mgr_locs.as_array, [bres])
+            else:
+                assert bres.ndim == 1, bres.shape
+                assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs)
+                new_res = zip(blk.mgr_locs.as_array, bres)
+
+            nr = dict(new_res)
+            assert not any(key in res for key in nr)
+            res.update(nr)
+
+        return res
+
     def apply(self, f, filter=None, **kwargs):
         """
         Iterate over the blocks, collect and create a new BlockManager.
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 1079f516a4e40e..584972f2b2dd5f 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -831,7 +831,7 @@ def reduction(values, axis=None, skipna=True, mask=None):
             try:
                 result = getattr(values, meth)(axis, dtype=dtype_max)
                 result.fill(np.nan)
-            except (AttributeError, TypeError, ValueError, np.core._internal.AxisError):
+            except (AttributeError, TypeError, ValueError):
                 result = np.nan
         else:
             result = getattr(values, meth)(axis)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 16c98f019b99d4..930d0a998e08ca 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -771,7 +771,7 @@ def test_omit_nuisance(df):
 
     # won't work with axis = 1
     grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1)
-    msg = r"unsupported operand type\(s\) for \+: 'Timestamp'"
+    msg = "reduction operation 'sum' not allowed for this dtype"
     with pytest.raises(TypeError, match=msg):
         grouped.agg(lambda x: x.sum(0, numeric_only=False))