Skip to content

Commit

Permalink
🐛 aggregations were getting overwritten if they had the same name
Browse files Browse the repository at this point in the history
  • Loading branch information
Marco Gorelli committed Jan 9, 2020
1 parent 6437f5e commit 65ae0c6
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`)
- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`)
- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`)
- Bug in :meth:`SeriesGroupBy._aggregate_multiple_funcs` was resulting in aggregations being overwritten when they shared the same name (:issue:`30092`)

Reshaping
^^^^^^^^^
Expand Down
16 changes: 11 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,8 @@ def _aggregate_multiple_funcs(self, arg):

arg = zip(columns, arg)

results = {}
results = []
Result = namedtuple("result", ["name", "aggregation"])
for name, func in arg:
obj = self

Expand All @@ -318,13 +319,18 @@ def _aggregate_multiple_funcs(self, arg):
obj = copy.copy(obj)
obj._reset_cache()
obj._selection = name
results[name] = obj.aggregate(func)
results.append(Result(name, obj.aggregate(func)))

if any(isinstance(x, DataFrame) for x in results.values()):
if any(isinstance(x.aggregation, DataFrame) for x in results):
# let higher level handle
return results
return dict(results)

return DataFrame(results, columns=columns)
# If there are multiple aggregations with the same name, we need to pass
# them to the constructor with different keys.
df = DataFrame({n: x.aggregation for n, x in enumerate(results)}).rename(
columns={n: column for n, column in enumerate(columns)}
)
return df

def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,60 @@ def test_agg_multiple_functions_maintain_order(df):
tm.assert_index_equal(result.columns, exp_cols)


def test_agg_multiple_functions_same_name(df):
np.random.seed(1)
df = tm.makeTimeDataFrame()
result = df.resample("3D").agg(
{
"A": [
functools.partial(np.quantile, q=0.9999),
functools.partial(np.quantile, q=0.90),
]
}
)
expected_index = pd.DatetimeIndex(
[
"2000-01-03",
"2000-01-06",
"2000-01-09",
"2000-01-12",
"2000-01-15",
"2000-01-18",
"2000-01-21",
"2000-01-24",
"2000-01-27",
"2000-01-30",
"2000-02-02",
"2000-02-05",
"2000-02-08",
"2000-02-11",
],
dtype="datetime64[ns]",
freq="3D",
)
expected_columns = pd.MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")])
expected_values = [
[1.62391486, 1.19384194],
[0.86521379, 0.67157],
[1.74440713, 1.34017672],
[0.31892541, 0.2053572],
[1.46210794, 1.46210794],
[-0.32242953, -0.33474463],
[1.13376944, 1.13376944],
[-0.17256929, -0.31351425],
[0.58276115, 0.52875507],
[1.14449918, 0.92018942],
[0.90159057, 0.90144377],
[-0.68372786, -0.68372786],
[-0.12291923, -0.1518898],
[0.53035547, 0.53035547],
]
expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)


def test_multiple_functions_tuples_and_non_tuples(df):
# #1359
funcs = [("foo", "mean"), "std"]
Expand Down

0 comments on commit 65ae0c6

Please sign in to comment.