diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index d1e94f62967f4..5a8b109c21858 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -50,6 +50,36 @@ def time_frame_op_with_scalar(self, dtype, scalar, op):
         op(self.df, scalar)
 
 
+class MixedFrameWithSeriesAxis0:
+    params = [
+        [
+            "eq",
+            "ne",
+            "lt",
+            "le",
+            "ge",
+            "gt",
+            "add",
+            "sub",
+            "div",
+            "floordiv",
+            "mul",
+            "pow",
+        ]
+    ]
+    param_names = ["opname"]
+
+    def setup(self, opname):
+        arr = np.arange(10 ** 6).reshape(100, -1)
+        df = DataFrame(arr)
+        df["C"] = 1.0
+        self.df = df
+        self.ser = df[0]
+
+    def time_frame_op_with_series_axis0(self, opname):
+        getattr(self.df, opname)(self.ser, axis=0)
+
+
 class Ops:
 
     params = [[True, False], ["default", 1]]
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 44deab25db695..912da955c14f3 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -183,7 +183,7 @@ Performance improvements
 
 - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`)
 - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`)
--
+- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx
index abe1484e3763d..c0971b91a2fa1 100644
--- a/pandas/_libs/ops.pyx
+++ b/pandas/_libs/ops.pyx
@@ -100,7 +100,7 @@ def scalar_compare(object[:] values, object val, object op):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def vec_compare(object[:] left, object[:] right, object op):
+def vec_compare(ndarray[object] left, ndarray[object] right, object op):
     """
     Compare the elements of `left` with the elements of `right` pointwise,
     with the comparison operation described by `op`.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 61641bfb24293..76e2caeff0cca 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5212,20 +5212,6 @@ def _arith_op(left, right):
 
         return new_data
 
-    def _combine_match_index(self, other: Series, func):
-        # at this point we have `self.index.equals(other.index)`
-
-        if ops.should_series_dispatch(self, other, func):
-            # operate column-wise; avoid costly object-casting in `.values`
-            new_data = ops.dispatch_to_series(self, other, func)
-        else:
-            # fastpath --> operate directly on values
-            other_vals = other.values.reshape(-1, 1)
-            with np.errstate(all="ignore"):
-                new_data = func(self.values, other_vals)
-            new_data = dispatch_fill_zeros(func, self.values, other_vals, new_data)
-        return new_data
-
     def _construct_result(self, result) -> "DataFrame":
         """
         Wrap the result of an arithmetic, comparison, or logical operation.
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index d0adf2da04db3..ed779c5da6d14 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -585,7 +585,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
 # DataFrame
 
 
-def _combine_series_frame(left, right, func, axis: int):
+def _combine_series_frame(left, right, func, axis: int, str_rep: str):
     """
     Apply binary operator `func` to self, other using alignment and fill
     conventions determined by the axis argument.
@@ -596,6 +596,7 @@ def _combine_series_frame(left, right, func, axis: int):
     right : Series
     func : binary operator
     axis : {0, 1}
+    str_rep : str
 
     Returns
     -------
@@ -603,7 +604,17 @@ def _combine_series_frame(left, right, func, axis: int):
     """
     # We assume that self.align(other, ...) has already been called
     if axis == 0:
-        new_data = left._combine_match_index(right, func)
+        values = right._values
+        if isinstance(values, np.ndarray):
+            # We can operate block-wise
+            values = values.reshape(-1, 1)
+
+            array_op = get_array_op(func, str_rep=str_rep)
+            bm = left._data.apply(array_op, right=values.T)
+            return type(left)(bm)
+
+        new_data = dispatch_to_series(left, right, func)
+
     else:
         new_data = dispatch_to_series(left, right, func, axis="columns")
 
@@ -791,7 +802,9 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
                 raise NotImplementedError(f"fill_value {fill_value} not supported.")
 
             axis = self._get_axis_number(axis) if axis is not None else 1
-            return _combine_series_frame(self, other, pass_op, axis=axis)
+            return _combine_series_frame(
+                self, other, pass_op, axis=axis, str_rep=str_rep
+            )
         else:
             # in this case we always have `np.ndim(other) == 0`
             if fill_value is not None:
@@ -826,7 +839,7 @@ def f(self, other, axis=default_axis, level=None):
 
         elif isinstance(other, ABCSeries):
             axis = self._get_axis_number(axis) if axis is not None else 1
-            return _combine_series_frame(self, other, op, axis=axis)
+            return _combine_series_frame(self, other, op, axis=axis, str_rep=str_rep)
         else:
             # in this case we always have `np.ndim(other) == 0`
             new_data = dispatch_to_series(self, other, op, str_rep)
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index 2c9105c52cf9b..e285c53d9813e 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -28,7 +28,6 @@
     ABCDatetimeArray,
     ABCExtensionArray,
     ABCIndex,
-    ABCIndexClass,
     ABCSeries,
     ABCTimedeltaArray,
 )
@@ -53,13 +52,15 @@ def comp_method_OBJECT_ARRAY(op, x, y):
         if isinstance(y, (ABCSeries, ABCIndex)):
             y = y.values
 
-        result = libops.vec_compare(x.ravel(), y, op)
+        if x.shape != y.shape:
+            raise ValueError("Shapes must match", x.shape, y.shape)
+        result = libops.vec_compare(x.ravel(), y.ravel(), op)
     else:
         result = libops.scalar_compare(x.ravel(), y, op)
     return result.reshape(x.shape)
 
 
-def masked_arith_op(x, y, op):
+def masked_arith_op(x: np.ndarray, y, op):
     """
     If the given arithmetic operation fails, attempt it again on
     only the non-null elements of the input array(s).
@@ -78,10 +79,22 @@ def masked_arith_op(x, y, op):
         dtype = find_common_type([x.dtype, y.dtype])
         result = np.empty(x.size, dtype=dtype)
 
+        if len(x) != len(y):
+            if not _can_broadcast(x, y):
+                raise ValueError(x.shape, y.shape)
+
+            # Call notna on pre-broadcasted y for performance
+            ymask = notna(y)
+            y = np.broadcast_to(y, x.shape)
+            ymask = np.broadcast_to(ymask, x.shape)
+
+        else:
+            ymask = notna(y)
+
         # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
         #  we would get int64 dtype, see GH#19956
         yrav = y.ravel()
-        mask = notna(xrav) & notna(yrav)
+        mask = notna(xrav) & ymask.ravel()
 
         if yrav.shape != mask.shape:
             # FIXME: GH#5284, GH#5035, GH#19448
@@ -211,6 +224,51 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str):
     return res_values
 
 
+def _broadcast_comparison_op(lvalues, rvalues, op) -> np.ndarray:
+    """
+    Broadcast a comparison operation between two 2D arrays.
+
+    Parameters
+    ----------
+    lvalues : np.ndarray or ExtensionArray
+    rvalues : np.ndarray or ExtensionArray
+
+    Returns
+    -------
+    np.ndarray[bool]
+    """
+    if isinstance(rvalues, np.ndarray):
+        rvalues = np.broadcast_to(rvalues, lvalues.shape)
+        result = comparison_op(lvalues, rvalues, op)
+    else:
+        result = np.empty(lvalues.shape, dtype=bool)
+        for i in range(len(lvalues)):
+            result[i, :] = comparison_op(lvalues[i], rvalues[:, 0], op)
+    return result
+
+
+def _can_broadcast(lvalues, rvalues) -> bool:
+    """
+    Check if we can broadcast rvalues to match the shape of lvalues.
+
+    Parameters
+    ----------
+    lvalues : np.ndarray or ExtensionArray
+    rvalues : np.ndarray or ExtensionArray
+
+    Returns
+    -------
+    bool
+    """
+    # We assume that lengths dont match
+    if lvalues.ndim == rvalues.ndim == 2:
+        # See if we can broadcast unambiguously
+        if lvalues.shape[1] == rvalues.shape[-1]:
+            if rvalues.shape[0] == 1:
+                return True
+    return False
+
+
 def comparison_op(
     left: ArrayLike, right: Any, op, str_rep: Optional[str] = None,
 ) -> ArrayLike:
@@ -237,12 +295,16 @@ def comparison_op(
         # TODO: same for tuples?
         rvalues = np.asarray(rvalues)
 
-    if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)):
+    if isinstance(rvalues, (np.ndarray, ABCExtensionArray)):
         # TODO: make this treatment consistent across ops and classes.
         #  We are not catching all listlikes here (e.g. frozenset, tuple)
         #  The ambiguous case is object-dtype.  See GH#27803
         if len(lvalues) != len(rvalues):
-            raise ValueError("Lengths must match to compare")
+            if _can_broadcast(lvalues, rvalues):
+                return _broadcast_comparison_op(lvalues, rvalues, op)
+            raise ValueError(
+                "Lengths must match to compare", lvalues.shape, rvalues.shape
+            )
 
     if should_extension_dispatch(lvalues, rvalues):
         res_values = dispatch_to_extension_op(op, lvalues, rvalues)
diff --git a/pandas/tests/arithmetic/test_array_ops.py b/pandas/tests/arithmetic/test_array_ops.py
index d8aaa3183a1c6..53cb10ba9fc5e 100644
--- a/pandas/tests/arithmetic/test_array_ops.py
+++ b/pandas/tests/arithmetic/test_array_ops.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pandas._testing as tm
-from pandas.core.ops.array_ops import na_logical_op
+from pandas.core.ops.array_ops import comparison_op, na_logical_op
 
 
 def test_na_logical_op_2d():
@@ -19,3 +19,18 @@ def test_na_logical_op_2d():
     result = na_logical_op(left, right, operator.or_)
     expected = right
     tm.assert_numpy_array_equal(result, expected)
+
+
+def test_object_comparison_2d():
+    left = np.arange(9).reshape(3, 3).astype(object)
+    right = left.T
+
+    result = comparison_op(left, right, operator.eq)
+    expected = np.eye(3).astype(bool)
+    tm.assert_numpy_array_equal(result, expected)
+
+    # Ensure that cython doesn't raise on non-writeable arg, which
+    #  we can get from np.broadcast_to
+    right.flags.writeable = False
+    result = comparison_op(left, right, operator.ne)
+    tm.assert_numpy_array_equal(result, ~expected)
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index e4be8a979a70f..92d86c8b602ff 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -348,6 +348,25 @@ def test_floordiv_axis0(self):
         result2 = df.floordiv(ser.values, axis=0)
         tm.assert_frame_equal(result2, expected)
 
+    @pytest.mark.slow
+    @pytest.mark.parametrize("opname", ["floordiv", "pow"])
+    def test_floordiv_axis0_numexpr_path(self, opname):
+        # case that goes through numexpr and has to fall back to masked_arith_op
+        op = getattr(operator, opname)
+
+        arr = np.arange(10 ** 6).reshape(100, -1)
+        df = pd.DataFrame(arr)
+        df["C"] = 1.0
+
+        ser = df[0]
+        result = getattr(df, opname)(ser, axis=0)
+
+        expected = pd.DataFrame({col: op(df[col], ser) for col in df.columns})
+        tm.assert_frame_equal(result, expected)
+
+        result2 = getattr(df, opname)(ser.values, axis=0)
+        tm.assert_frame_equal(result2, expected)
+
     def test_df_add_td64_columnwise(self):
         # GH 22534 Check that column-wise addition broadcasts correctly
         dti = pd.date_range("2016-01-01", periods=10)