From 06850a148ad880eb2fd2564cc0ad7cae8606dd90 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 11 Aug 2017 03:36:13 -0700 Subject: [PATCH] move pivot_table doc-string to DataFrame (#17174) --- pandas/core/frame.py | 86 ++++++++++++++++++++++++++++ pandas/core/reshape/pivot.py | 107 +++++++---------------------------- 2 files changed, 107 insertions(+), 86 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94cce1b4d05b5..2c82fe4c348d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + _shared_docs['pivot_table'] = """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + + @Substitution('') + @Appender(_shared_docs['pivot_table']) + def pivot_table(self, values=None, index=None, columns=None, + aggfunc='mean', fill_value=None, margins=False, + dropna=True, margins_name='All'): + from pandas.core.reshape.pivot import pivot_table + return pivot_table(self, values=values, index=index, columns=columns, + aggfunc=aggfunc, fill_value=fill_value, + margins=margins, dropna=dropna, + margins_name=margins_name) + def stack(self, level=-1, dropna=True): """ Pivot a level of the (possibly hierarchical) column labels, returning a diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d4ea49c130add..e61adf3aac30a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,95 +2,30 @@ from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + from pandas.core.reshape.concat import concat -from pandas import Series, DataFrame, MultiIndex, Index +from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import _get_combined_index +from pandas.core.index import Index, _get_combined_index from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com +from pandas.util._decorators import Appender, Substitution + +from pandas.core.frame import _shared_docs +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency + import numpy as np +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in the - pivot table will be stored in MultiIndex objects (hierarchical indexes) on - the index and columns of the result DataFrame - - Parameters - ---------- - data : DataFrame - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, it - is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, it - is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names (inferred - from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ index = _convert_by(index) columns = _convert_by(columns) @@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged.unstack(to_unstack) if not dropna: + from pandas import MultiIndex try: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) @@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', except AttributeError: pass # it's a single level or a series - if isinstance(table, DataFrame): + if isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) if fill_value is not None: @@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T - # GH 15193 Makse sure empty columns are removed if dropna=True - if isinstance(table, DataFrame) and dropna: + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: table = table.dropna(how='all', axis=1) return table -DataFrame.pivot_table = pivot_table - - def _add_margins(table, data, values, rows, cols, aggfunc, margins_name='All', fill_value=None): if not isinstance(margins_name, compat.string_types): @@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: key = margins_name - if not values and isinstance(table, Series): + if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. return table.append(Series({key: grand_margin[margins_name]})) @@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: row_margin[k] = grand_margin[k[0]] + from pandas import DataFrame margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names @@ -402,7 +336,7 @@ def _convert_by(by): if by is None: by = [] elif (is_scalar(by) or - isinstance(by, (np.ndarray, Index, Series, Grouper)) or + isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or hasattr(by, '__call__')): by = [by] else: @@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") + from pandas import DataFrame df = DataFrame(data, index=common_idx) if values is None: df['__dummy__'] = 0 @@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'): if names is None: names = [] for i, arr in enumerate(arrs): - if isinstance(arr, Series) and arr.name is not None: + if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: names.append('%s_%d' % (prefix, i))