Skip to content

Commit

Permalink
BUG: aggregations were getting overwritten if they had the same name (#…
Browse files Browse the repository at this point in the history
…30858)

* 🐛 aggregations were getting overwritten if they had the same name
  • Loading branch information
MarcoGorelli authored Jul 14, 2020
1 parent b018691 commit b6222ec
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 6 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1093,6 +1093,7 @@ Reshaping
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
- Bug in :meth:`SeriesGroupBy.aggregate` was resulting in aggregations being overwritten when they shared the same name (:issue:`30880`)
- Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`)
- :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`)
- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`)
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def aggregate(
if isinstance(ret, dict):
from pandas import concat

ret = concat(ret, axis=1)
ret = concat(ret.values(), axis=1, keys=[key.label for key in ret.keys()])
return ret

agg = aggregate
Expand Down Expand Up @@ -307,8 +307,8 @@ def _aggregate_multiple_funcs(self, arg):

arg = zip(columns, arg)

results = {}
for name, func in arg:
results: Dict[base.OutputKey, Union[Series, DataFrame]] = {}
for idx, (name, func) in enumerate(arg):
obj = self

# reset the cache so that we
Expand All @@ -317,13 +317,14 @@ def _aggregate_multiple_funcs(self, arg):
obj = copy.copy(obj)
obj._reset_cache()
obj._selection = name
results[name] = obj.aggregate(func)
results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func)

if any(isinstance(x, DataFrame) for x in results.values()):
# let higher level handle
return results

return self.obj._constructor_expanddim(results, columns=columns)
output = self._wrap_aggregated_output(results)
return self.obj._constructor_expanddim(output, columns=columns)

def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
Expand Down Expand Up @@ -354,10 +355,12 @@ def _wrap_series_output(
if len(output) > 1:
result = self.obj._constructor_expanddim(indexed_output, index=index)
result.columns = columns
else:
elif not columns.empty:
result = self.obj._constructor(
indexed_output[0], index=index, name=columns[0]
)
else:
result = self.obj._constructor_expanddim()

return result

Expand Down
58 changes: 58 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
test .agg behavior / note that .apply is tested generally in test_groupby.py
"""
import functools
from functools import partial

import numpy as np
import pytest

from pandas.errors import PerformanceWarning

from pandas.core.dtypes.common import is_integer_dtype

import pandas as pd
Expand Down Expand Up @@ -252,6 +255,61 @@ def test_agg_multiple_functions_maintain_order(df):
tm.assert_index_equal(result.columns, exp_cols)


def test_agg_multiple_functions_same_name():
# GH 30880
df = pd.DataFrame(
np.random.randn(1000, 3),
index=pd.date_range("1/1/2012", freq="S", periods=1000),
columns=["A", "B", "C"],
)
result = df.resample("3T").agg(
{"A": [partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
)
expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
expected_columns = MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")])
expected_values = np.array(
[df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
).T
expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
tm.assert_frame_equal(result, expected)


def test_agg_multiple_functions_same_name_with_ohlc_present():
# GH 30880
# ohlc expands dimensions, so different test to the above is required.
df = pd.DataFrame(
np.random.randn(1000, 3),
index=pd.date_range("1/1/2012", freq="S", periods=1000),
columns=["A", "B", "C"],
)
result = df.resample("3T").agg(
{"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
)
expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
expected_columns = pd.MultiIndex.from_tuples(
[
("A", "ohlc", "open"),
("A", "ohlc", "high"),
("A", "ohlc", "low"),
("A", "ohlc", "close"),
("A", "quantile", "A"),
("A", "quantile", "A"),
]
)
non_ohlc_expected_values = np.array(
[df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
).T
expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values])
expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
# PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(result, expected)


def test_multiple_functions_tuples_and_non_tuples(df):
# #1359
funcs = [("foo", "mean"), "std"]
Expand Down

0 comments on commit b6222ec

Please sign in to comment.