From d0c13a3b768fb1b5c511d66a19761e2b43f3e4ed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 8 Feb 2018 01:57:19 +0100 Subject: [PATCH] DOC: some clean-up of the apply docs (follow-up #18577) (#19573) --- doc/source/basics.rst | 16 ++++++++------- doc/source/whatsnew/v0.23.0.txt | 23 +++++++++++---------- pandas/core/frame.py | 36 ++++++++++++++++++--------------- pandas/core/sparse/frame.py | 18 ++++++++++------- 4 files changed, 52 insertions(+), 41 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index fb9e5a6cc75cb..749d4be11ad45 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -774,9 +774,9 @@ We encourage you to view the source code of :meth:`~DataFrame.pipe`. Row or Column-wise Function Application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Arbitrary functions can be applied along the axes of a DataFrame or Panel +Arbitrary functions can be applied along the axes of a DataFrame using the :meth:`~DataFrame.apply` method, which, like the descriptive -statistics methods, take an optional ``axis`` argument: +statistics methods, takes an optional ``axis`` argument: .. ipython:: python @@ -794,13 +794,15 @@ The :meth:`~DataFrame.apply` method will also dispatch on a string method name. df.apply('mean', axis=1) The return type of the function passed to :meth:`~DataFrame.apply` affects the -type of the ultimate output from DataFrame.apply +type of the final output from ``DataFrame.apply`` for the default behaviour: -* If the applied function returns a ``Series``, the ultimate output is a ``DataFrame``. +* If the applied function returns a ``Series``, the final output is a ``DataFrame``. The columns match the index of the ``Series`` returned by the applied function. -* If the applied function returns any other type, the ultimate output is a ``Series``. -* A ``result_type`` kwarg is accepted with the options: ``reduce``, ``broadcast``, and ``expand``. - These will determine how list-likes return results expand (or not) to a ``DataFrame``. +* If the applied function returns any other type, the final output is a ``Series``. + +This default behaviour can be overridden using the ``result_type``, which +accepts three options: ``reduce``, ``broadcast``, and ``expand``. +These will determine how list-likes return values expand (or not) to a ``DataFrame``. :meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions about a data set. For example, suppose we wanted to extract the date where the diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a7300f7d1ceb0..7782e5f1ffa56 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -334,20 +334,20 @@ Convert to an xarray DataArray .. _whatsnew_0230.api_breaking.apply: -Apply Changes -~~~~~~~~~~~~~ +Changes to make output of ``DataFrame.apply`` consistent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case -where a list-like (e.g. ``tuple`` or ``list`` is returned), (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, -:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`) +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). .. ipython:: python df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, columns=['A', 'B', 'C']) df -Previous Behavior. If the returned shape happened to match the original columns, this would return a ``DataFrame``. +Previous Behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. If the return shape did not match, a ``Series`` with lists was returned. .. code-block:: python @@ -373,7 +373,7 @@ If the return shape did not match, a ``Series`` with lists was returned. dtype: object -New Behavior. The behavior is consistent. These will *always* return a ``Series``. +New Behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. .. ipython:: python @@ -386,8 +386,9 @@ To have expanded columns, you can use ``result_type='expand'`` df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') -To have broadcast the result across, you can use ``result_type='broadcast'``. The shape -must match the original columns. +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. .. ipython:: python @@ -397,7 +398,7 @@ Returning a ``Series`` allows one to control the exact return structure and colu .. ipython:: python - df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']]), axis=1) .. _whatsnew_0230.api_breaking.build_changes: @@ -523,8 +524,8 @@ Deprecations - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) -- The ``broadcast`` parameter of ``.apply()`` is removed in favor of ``result_type='broadcast'`` (:issue:`18577`) -- The ``reduce`` parameter of ``.apply()`` is removed in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9487f51919108..28923f0fbf240 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4822,12 +4822,12 @@ def aggregate(self, func, axis=0, *args, **kwargs): def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, result_type=None, args=(), **kwds): - """Applies function along input axis of DataFrame. + """Applies function along an axis of the DataFrame. Objects passed to functions are Series objects having index either the DataFrame's index (axis=0) or the columns (axis=1). - Return type depends on whether passed function aggregates, or the - reduce argument if the DataFrame is empty. + Final return type depends on the return type of the applied function, + or on the `result_type` argument. Parameters ---------- @@ -4863,15 +4863,18 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, by result_type='reduce'. result_type : {'expand', 'reduce', 'broadcast, None} - These only act when axis=1 {columns} + These only act when axis=1 {columns}: + * 'expand' : list-like results will be turned into columns. * 'reduce' : return a Series if possible rather than expanding list-like results. This is the opposite to 'expand'. * 'broadcast' : results will be broadcast to the original shape of the frame, the original index & columns will be retained. - * None : list-like results will be returned as a list - in a single column. However if the apply function - returns a Series these are expanded to columns. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. .. versionadded:: 0.23.0 @@ -4893,8 +4896,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, We use this DataFrame to illustrate - >>> df = DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, - ... columns=['A', 'B', 'C']) + >>> df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + ... columns=['A', 'B', 'C']) >>> df A B C 0 1 2 3 @@ -4904,7 +4907,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 3 5 1 2 3 - Using a ufunc + Using a numpy universal function (in this case the same as + ``np.sqrt(df)``): >>> df.apply(np.sqrt) A B C @@ -4954,8 +4958,8 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 5 1 2 - Return a Series inside the function is similar to passing - Passing result_type='expand'. The resulting column names + Returning a Series inside the function is similar to passing + ``result_type='expand'``. The resulting column names will be the Series index. >>> df.apply(lambda x: Series([1, 2], index=['foo', 'bar']), axis=1) @@ -4967,10 +4971,10 @@ def apply(self, func, axis=0, broadcast=None, raw=False, reduce=None, 4 1 2 5 1 2 - - Passing result_type='broadcast' will take a same shape - result, whether list-like or scalar and broadcast it - along the axis. The resulting column names will be the originals. + Passing ``result_type='broadcast'`` will ensure the same shape + result, whether list-like or scalar is returned by the function, + and broadcast it along the axis. The resulting column names will + be the originals. >>> df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') A B C diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 371377ce2899c..19b126216db81 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -861,14 +861,18 @@ def apply(self, func, axis=0, broadcast=None, reduce=None, by result_type='reduce'. result_type : {'expand', 'reduce', 'broadcast, None} - These only act when axis=1 {columns} - * 'expand' : list-like results will be turned into columns + These only act when axis=1 {columns}: + + * 'expand' : list-like results will be turned into columns. * 'reduce' : return a Series if possible rather than expanding - list-like results. This is the opposite to 'expand' - * 'broadcast' : scalar results will be broadcast to all columns - * None : list-like results will be returned as a list - in a single column. However if the apply function - returns a Series these are expanded to columns. + list-like results. This is the opposite to 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the frame, the original index & columns will be retained. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. .. versionadded:: 0.23.0