Skip to content

Commit

Permalink
API: Expanded resample
Browse files Browse the repository at this point in the history
closes #13500

Author: Chris <cbartak@gmail.com>

Closes #13961 from chris-b1/resample-api and squashes the following commits:

b8dd114 [Chris] make _from_selection a property
10c7280 [Chris] NotImp -> ValueError
e203fcf [Chris] doc updates
384026b [Chris] remove PeriodIndex workaround
c7b299e [Chris] cleanup debugging
5fd97d9 [Chris] add from_selection bookkeeping
7f9add4 [Chris] more wip
b55309a [Chris] wip
c4db0e7 [Chris] move error handling; doc fixups
def74de [Chris] API: Expanded resample
  • Loading branch information
chris-b1 authored and jreback committed Aug 31, 2016
1 parent f92cd7e commit 8654a9e
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 54 deletions.
24 changes: 24 additions & 0 deletions doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1473,6 +1473,30 @@ Furthermore, you can also specify multiple aggregation functions for each column
r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
If a ``DataFrame`` does not have a datetimelike index, but instead you want
to resample based on datetimelike column in the frame, it can passed to the
``on`` keyword.

.. ipython:: python
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
'a': np.arange(5)},
index=pd.MultiIndex.from_arrays([
[1,2,3,4,5],
pd.date_range('2015-01-01', freq='W', periods=5)],
names=['v','d']))
df
df.resample('M', on='date').sum()
Similarly, if you instead want to resample by a datetimelike
level of ``MultiIndex``, its name or location can be passed to the
``level`` keyword.

.. ipython:: python
df.resample(level='d').sum()
.. _timeseries.periods:

Time Span Representation
Expand Down
14 changes: 14 additions & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,20 @@ Other enhancements

pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30)

- the ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`)

.. ipython:: python

df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
'a': np.arange(5)},
index=pd.MultiIndex.from_arrays([
[1,2,3,4,5],
pd.date_range('2015-01-01', freq='W', periods=5)],
names=['v','d']))
df
df.resample('M', on='date').sum()
df.resample('M', level='d').sum()

- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`)
- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`)
Expand Down
21 changes: 16 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4047,10 +4047,12 @@ def between_time(self, start_time, end_time, include_start=True,

def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
label=None, convention='start', kind=None, loffset=None,
limit=None, base=0):
limit=None, base=0, on=None, level=None):
"""
Convenience method for frequency conversion and resampling of regular
time-series data.
Convenience method for frequency conversion and resampling of time
series. Object must have a datetime-like index (DatetimeIndex,
PeriodIndex, or TimedeltaIndex), or pass datetime-like values
to the on or level keyword.
Parameters
----------
Expand All @@ -4068,7 +4070,17 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
For frequencies that evenly subdivide 1 day, the "origin" of the
aggregated intervals. For example, for '5min' frequency, base could
range from 0 through 4. Defaults to 0
on : string, optional
For a DataFrame, column to use instead of index for resampling.
Column must be datetime-like.
.. versionadded:: 0.19.0
level : string or int, optional
For a MultiIndex, level (name or number) to use for
resampling. Level must be datetime-like.
.. versionadded:: 0.19.0
To learn more about the offset strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Expand Down Expand Up @@ -4173,12 +4185,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
"""
from pandas.tseries.resample import (resample,
_maybe_process_deprecations)

axis = self._get_axis_number(axis)
r = resample(self, freq=rule, label=label, closed=closed,
axis=axis, kind=kind, loffset=loffset,
convention=convention,
base=base)
base=base, key=on, level=level)
return _maybe_process_deprecations(r,
how=how,
fill_method=fill_method,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ def _set_grouper(self, obj, sort=False):
Parameters
----------
obj : the subject object
sort : bool, default False
whether the resulting grouper should be sorted
"""

if self.key is not None and self.level is not None:
Expand Down
33 changes: 32 additions & 1 deletion pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ def _typ(self):
return 'series'
return 'dataframe'

@property
def _from_selection(self):
""" is the resampling from a DataFrame column or MultiIndex level """
# upsampling and PeriodIndex resampling do not work
# with selection, this state used to catch and raise an error
return (self.groupby is not None and
(self.groupby.key is not None or
self.groupby.level is not None))

def _deprecated(self, op):
warnings.warn(("\n.resample() is now a deferred operation\n"
"You called {op}(...) on this deferred object "
Expand Down Expand Up @@ -207,6 +216,10 @@ def _convert_obj(self, obj):
Parameters
----------
obj : the object to be resampled
Returns
-------
obj : converted object
"""
obj = obj.consolidate()
return obj
Expand Down Expand Up @@ -706,6 +719,11 @@ def _upsample(self, method, limit=None):
self._set_binner()
if self.axis:
raise AssertionError('axis must be 0')
if self._from_selection:
raise ValueError("Upsampling from level= or on= selection"
" is not supported, use .set_index(...)"
" to explicitly set index to"
" datetime-like")

ax = self.ax
obj = self._selected_obj
Expand Down Expand Up @@ -763,7 +781,15 @@ def _convert_obj(self, obj):

# convert to timestamp
if not (self.kind is None or self.kind == 'period'):
obj = obj.to_timestamp(how=self.convention)
if self._from_selection:
# see GH 14008, GH 12871
msg = ("Resampling from level= or on= selection"
" with a PeriodIndex is not currently supported,"
" use .set_index(...) to explicitly set index")
raise NotImplementedError(msg)
else:
obj = obj.to_timestamp(how=self.convention)

return obj

def aggregate(self, arg, *args, **kwargs):
Expand Down Expand Up @@ -841,6 +867,11 @@ def _upsample(self, method, limit=None):
.fillna
"""
if self._from_selection:
raise ValueError("Upsampling from level= or on= selection"
" is not supported, use .set_index(...)"
" to explicitly set index to"
" datetime-like")
# we may need to actually resample as if we are timestamps
if self.kind == 'timestamp':
return super(PeriodIndexResampler, self)._upsample(method,
Expand Down
Loading

0 comments on commit 8654a9e

Please sign in to comment.