API: Expanded resample

closes #13500 Author: Chris <cbartak@gmail.com> Closes #13961 from chris-b1/resample-api and squashes the following commits: b8dd114 [Chris] make _from_selection a property 10c7280 [Chris] NotImp -> ValueError e203fcf [Chris] doc updates 384026b [Chris] remove PeriodIndex workaround c7b299e [Chris] cleanup debugging 5fd97d9 [Chris] add from_selection bookkeeping 7f9add4 [Chris] more wip b55309a [Chris] wip c4db0e7 [Chris] move error handling; doc fixups def74de [Chris] API: Expanded resample
pandas-dev · Aug 31, 2016 · 8654a9e · 8654a9e
1 parent f92cd7e
commit 8654a9e
Show file tree

Hide file tree

Showing 6 changed files with 221 additions and 54 deletions.
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -1473,6 +1473,30 @@ Furthermore, you can also specify multiple aggregation functions for each column
    r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
 
 
+If a ``DataFrame`` does not have a datetimelike index, but instead you want
+to resample based on datetimelike column in the frame, it can passed to the
+``on`` keyword.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
+                      'a': np.arange(5)},
+                     index=pd.MultiIndex.from_arrays([
+                              [1,2,3,4,5],
+                              pd.date_range('2015-01-01', freq='W', periods=5)],
+                          names=['v','d']))
+   df
+   df.resample('M', on='date').sum()
+
+Similarly, if you instead want to resample by a datetimelike
+level of ``MultiIndex``, its name or location can be passed to the
+``level`` keyword.
+
+.. ipython:: python
+
+   df.resample(level='d').sum()
+
+
 .. _timeseries.periods:
 
 Time Span Representation

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -397,6 +397,20 @@ Other enhancements
 
     pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30)
 
+- the ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`)
+
+  .. ipython:: python
+
+     df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
+                        'a': np.arange(5)},
+                       index=pd.MultiIndex.from_arrays([
+                                [1,2,3,4,5],
+                                pd.date_range('2015-01-01', freq='W', periods=5)],
+                            names=['v','d']))
+     df
+     df.resample('M', on='date').sum()
+     df.resample('M', level='d').sum()
+
 - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`)
 - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`)
 - The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4047,10 +4047,12 @@ def between_time(self, start_time, end_time, include_start=True,
 
     def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
                  label=None, convention='start', kind=None, loffset=None,
-                 limit=None, base=0):
+                 limit=None, base=0, on=None, level=None):
         """
-        Convenience method for frequency conversion and resampling of regular
-        time-series data.
+        Convenience method for frequency conversion and resampling of time
+        series.  Object must have a datetime-like index (DatetimeIndex,
+        PeriodIndex, or TimedeltaIndex), or pass datetime-like values
+        to the on or level keyword.
 
         Parameters
         ----------
@@ -4068,7 +4070,17 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
             For frequencies that evenly subdivide 1 day, the "origin" of the
             aggregated intervals. For example, for '5min' frequency, base could
             range from 0 through 4. Defaults to 0
+        on : string, optional
+            For a DataFrame, column to use instead of index for resampling.
+            Column must be datetime-like.
 
+            .. versionadded:: 0.19.0
+
+        level : string or int, optional
+            For a MultiIndex, level (name or number) to use for
+            resampling.  Level must be datetime-like.
+
+            .. versionadded:: 0.19.0
 
         To learn more about the offset strings, please see `this link
         <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -4173,12 +4185,11 @@ def resample(self, rule, how=None, axis=0, fill_method=None, closed=None,
         """
         from pandas.tseries.resample import (resample,
                                              _maybe_process_deprecations)
-
         axis = self._get_axis_number(axis)
         r = resample(self, freq=rule, label=label, closed=closed,
                      axis=axis, kind=kind, loffset=loffset,
                      convention=convention,
-                     base=base)
+                     base=base, key=on, level=level)
         return _maybe_process_deprecations(r,
                                            how=how,
                                            fill_method=fill_method,

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -255,7 +255,8 @@ def _set_grouper(self, obj, sort=False):
         Parameters
         ----------
         obj : the subject object
-
+        sort : bool, default False
+            whether the resulting grouper should be sorted
         """
 
         if self.key is not None and self.level is not None:

diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py
@@ -112,6 +112,15 @@ def _typ(self):
             return 'series'
         return 'dataframe'
 
+    @property
+    def _from_selection(self):
+        """ is the resampling from a DataFrame column or MultiIndex level """
+        # upsampling and PeriodIndex resampling do not work
+        # with selection, this state used to catch and raise an error
+        return (self.groupby is not None and
+                (self.groupby.key is not None or
+                 self.groupby.level is not None))
+
     def _deprecated(self, op):
         warnings.warn(("\n.resample() is now a deferred operation\n"
                        "You called {op}(...) on this deferred object "
@@ -207,6 +216,10 @@ def _convert_obj(self, obj):
         Parameters
         ----------
         obj : the object to be resampled
+
+        Returns
+        -------
+        obj : converted object
         """
         obj = obj.consolidate()
         return obj
@@ -706,6 +719,11 @@ def _upsample(self, method, limit=None):
         self._set_binner()
         if self.axis:
             raise AssertionError('axis must be 0')
+        if self._from_selection:
+            raise ValueError("Upsampling from level= or on= selection"
+                             " is not supported, use .set_index(...)"
+                             " to explicitly set index to"
+                             " datetime-like")
 
         ax = self.ax
         obj = self._selected_obj
@@ -763,7 +781,15 @@ def _convert_obj(self, obj):
 
         # convert to timestamp
         if not (self.kind is None or self.kind == 'period'):
-            obj = obj.to_timestamp(how=self.convention)
+            if self._from_selection:
+                # see GH 14008, GH 12871
+                msg = ("Resampling from level= or on= selection"
+                       " with a PeriodIndex is not currently supported,"
+                       " use .set_index(...) to explicitly set index")
+                raise NotImplementedError(msg)
+            else:
+                obj = obj.to_timestamp(how=self.convention)
+
         return obj
 
     def aggregate(self, arg, *args, **kwargs):
@@ -841,6 +867,11 @@ def _upsample(self, method, limit=None):
         .fillna
 
         """
+        if self._from_selection:
+            raise ValueError("Upsampling from level= or on= selection"
+                             " is not supported, use .set_index(...)"
+                             " to explicitly set index to"
+                             " datetime-like")
         # we may need to actually resample as if we are timestamps
         if self.kind == 'timestamp':
             return super(PeriodIndexResampler, self)._upsample(method,