diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ba4d7dd063c38..32217a5d5f740 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -50,7 +50,7 @@ class providing the base-class of operations.
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
 
-from pandas.core.dtypes.cast import maybe_cast_result
+from pandas.core.dtypes.cast import maybe_cast_result, maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
     ensure_float,
     is_bool_dtype,
@@ -1185,22 +1185,24 @@ def _python_agg_general(self, func, *args, **kwargs):
 
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
-            output[key] = maybe_cast_result(result, obj, numeric_only=True)
 
-        if not output:
-            return self._python_apply_general(f, self._selected_obj)
+            if is_numeric_dtype(obj.dtype):
+                result = maybe_downcast_to_dtype(result, obj.dtype)
 
-        if self.grouper._filter_empty_groups:
-
-            mask = counts.ravel() > 0
-            for key, result in output.items():
+            if self.grouper._filter_empty_groups:
+                mask = counts.ravel() > 0
 
                 # since we are masking, make sure that we have a float object
                 values = result
                 if is_numeric_dtype(values.dtype):
                     values = ensure_float(values)
 
-                output[key] = maybe_cast_result(values[mask], result)
+                result = maybe_downcast_to_dtype(values[mask], result.dtype)
+
+            output[key] = result
+
+        if not output:
+            return self._python_apply_general(f, self._selected_obj)
 
         return self._wrap_aggregated_output(output, index=self.grouper.result_index)
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ded5f610b850e..fb60bd8e3b3c1 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -718,7 +718,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
             result[label] = res
 
         result = lib.maybe_convert_objects(result, try_float=0)
-        # TODO: maybe_cast_to_extension_array?
+        result = maybe_cast_result(result, obj, numeric_only=True)
 
         return result, counts