🐛 aggregations were getting overwritten if they had the same name

pandas-dev · Jan 9, 2020 · 65ae0c6 · 65ae0c6
1 parent 6437f5e
commit 65ae0c6
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 5 deletions.
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -1017,6 +1017,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
 - Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
 - Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
+- Bug in :meth:`SeriesGroupBy._aggregate_multiple_funcs` was resulting in aggregations being overwritten when they shared the same name (:issue:`30092`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -308,7 +308,8 @@ def _aggregate_multiple_funcs(self, arg):
 
             arg = zip(columns, arg)
 
-        results = {}
+        results = []
+        Result = namedtuple("result", ["name", "aggregation"])
         for name, func in arg:
             obj = self
 
@@ -318,13 +319,18 @@ def _aggregate_multiple_funcs(self, arg):
                 obj = copy.copy(obj)
                 obj._reset_cache()
                 obj._selection = name
-            results[name] = obj.aggregate(func)
+            results.append(Result(name, obj.aggregate(func)))
 
-        if any(isinstance(x, DataFrame) for x in results.values()):
+        if any(isinstance(x.aggregation, DataFrame) for x in results):
             # let higher level handle
-            return results
+            return dict(results)
 
-        return DataFrame(results, columns=columns)
+        # If there are multiple aggregations with the same name, we need to pass
+        # them to the constructor with different keys.
+        df = DataFrame({n: x.aggregation for n, x in enumerate(results)}).rename(
+            columns={n: column for n, column in enumerate(columns)}
+        )
+        return df
 
     def _wrap_series_output(
         self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -239,6 +239,60 @@ def test_agg_multiple_functions_maintain_order(df):
     tm.assert_index_equal(result.columns, exp_cols)
 
 
+def test_agg_multiple_functions_same_name(df):
+    np.random.seed(1)
+    df = tm.makeTimeDataFrame()
+    result = df.resample("3D").agg(
+        {
+            "A": [
+                functools.partial(np.quantile, q=0.9999),
+                functools.partial(np.quantile, q=0.90),
+            ]
+        }
+    )
+    expected_index = pd.DatetimeIndex(
+        [
+            "2000-01-03",
+            "2000-01-06",
+            "2000-01-09",
+            "2000-01-12",
+            "2000-01-15",
+            "2000-01-18",
+            "2000-01-21",
+            "2000-01-24",
+            "2000-01-27",
+            "2000-01-30",
+            "2000-02-02",
+            "2000-02-05",
+            "2000-02-08",
+            "2000-02-11",
+        ],
+        dtype="datetime64[ns]",
+        freq="3D",
+    )
+    expected_columns = pd.MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")])
+    expected_values = [
+        [1.62391486, 1.19384194],
+        [0.86521379, 0.67157],
+        [1.74440713, 1.34017672],
+        [0.31892541, 0.2053572],
+        [1.46210794, 1.46210794],
+        [-0.32242953, -0.33474463],
+        [1.13376944, 1.13376944],
+        [-0.17256929, -0.31351425],
+        [0.58276115, 0.52875507],
+        [1.14449918, 0.92018942],
+        [0.90159057, 0.90144377],
+        [-0.68372786, -0.68372786],
+        [-0.12291923, -0.1518898],
+        [0.53035547, 0.53035547],
+    ]
+    expected = pd.DataFrame(
+        expected_values, columns=expected_columns, index=expected_index
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_multiple_functions_tuples_and_non_tuples(df):
     # #1359
     funcs = [("foo", "mean"), "std"]