Skip to content

Commit

Permalink
move pivot_table doc-string to DataFrame (#17174)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Aug 11, 2017
1 parent d11fae6 commit 06850a1
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 86 deletions.
86 changes: 86 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4154,6 +4154,92 @@ def pivot(self, index=None, columns=None, values=None):
from pandas.core.reshape.reshape import pivot
return pivot(self, index=index, columns=columns, values=values)

_shared_docs['pivot_table'] = """
Create a spreadsheet-style pivot table as a DataFrame. The levels in
the pivot table will be stored in MultiIndex objects (hierarchical
indexes) on the index and columns of the result DataFrame
Parameters
----------%s
values : column to aggregate, optional
index : column, Grouper, array, or list of the previous
If an array is passed, it must be the same length as the data. The
list can contain any of the other types (except list).
Keys to group by on the pivot table index. If an array is passed,
it is being used as the same manner as column values.
columns : column, Grouper, array, or list of the previous
If an array is passed, it must be the same length as the data. The
list can contain any of the other types (except list).
Keys to group by on the pivot table column. If an array is passed,
it is being used as the same manner as column values.
aggfunc : function or list of functions, default numpy.mean
If list of functions passed, the resulting pivot table will have
hierarchical columns whose top level are the function names
(inferred from the function objects themselves)
fill_value : scalar, default None
Value to replace missing values with
margins : boolean, default False
Add all row / columns (e.g. for subtotal / grand totals)
dropna : boolean, default True
Do not include columns whose entries are all NaN
margins_name : string, default 'All'
Name of the row / column that will contain the totals
when margins is True.
Examples
--------
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
... "bar", "bar", "bar", "bar"],
... "B": ["one", "one", "one", "two", "two",
... "one", "one", "two", "two"],
... "C": ["small", "large", "large", "small",
... "small", "large", "small", "small",
... "large"],
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
>>> df
A B C D
0 foo one small 1
1 foo one large 2
2 foo one large 2
3 foo two small 3
4 foo two small 3
5 bar one large 4
6 bar one small 5
7 bar two small 6
8 bar two large 7
>>> table = pivot_table(df, values='D', index=['A', 'B'],
... columns=['C'], aggfunc=np.sum)
>>> table
... # doctest: +NORMALIZE_WHITESPACE
C large small
A B
bar one 4.0 5.0
two 7.0 6.0
foo one 4.0 1.0
two NaN 6.0
Returns
-------
table : DataFrame
See also
--------
DataFrame.pivot : pivot without aggregation that can handle
non-numeric data
"""

@Substitution('')
@Appender(_shared_docs['pivot_table'])
def pivot_table(self, values=None, index=None, columns=None,
aggfunc='mean', fill_value=None, margins=False,
dropna=True, margins_name='All'):
from pandas.core.reshape.pivot import pivot_table
return pivot_table(self, values=values, index=index, columns=columns,
aggfunc=aggfunc, fill_value=fill_value,
margins=margins, dropna=dropna,
margins_name=margins_name)

def stack(self, level=-1, dropna=True):
"""
Pivot a level of the (possibly hierarchical) column labels, returning a
Expand Down
107 changes: 21 additions & 86 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,95 +2,30 @@


from pandas.core.dtypes.common import is_list_like, is_scalar
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

from pandas.core.reshape.concat import concat
from pandas import Series, DataFrame, MultiIndex, Index
from pandas.core.series import Series
from pandas.core.groupby import Grouper
from pandas.core.reshape.util import cartesian_product
from pandas.core.index import _get_combined_index
from pandas.core.index import Index, _get_combined_index
from pandas.compat import range, lrange, zip
from pandas import compat
import pandas.core.common as com
from pandas.util._decorators import Appender, Substitution

from pandas.core.frame import _shared_docs
# Note: We need to make sure `frame` is imported before `pivot`, otherwise
# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency

import numpy as np


@Substitution('\ndata : DataFrame')
@Appender(_shared_docs['pivot_table'], indents=1)
def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
fill_value=None, margins=False, dropna=True,
margins_name='All'):
"""
Create a spreadsheet-style pivot table as a DataFrame. The levels in the
pivot table will be stored in MultiIndex objects (hierarchical indexes) on
the index and columns of the result DataFrame
Parameters
----------
data : DataFrame
values : column to aggregate, optional
index : column, Grouper, array, or list of the previous
If an array is passed, it must be the same length as the data. The list
can contain any of the other types (except list).
Keys to group by on the pivot table index. If an array is passed, it
is being used as the same manner as column values.
columns : column, Grouper, array, or list of the previous
If an array is passed, it must be the same length as the data. The list
can contain any of the other types (except list).
Keys to group by on the pivot table column. If an array is passed, it
is being used as the same manner as column values.
aggfunc : function or list of functions, default numpy.mean
If list of functions passed, the resulting pivot table will have
hierarchical columns whose top level are the function names (inferred
from the function objects themselves)
fill_value : scalar, default None
Value to replace missing values with
margins : boolean, default False
Add all row / columns (e.g. for subtotal / grand totals)
dropna : boolean, default True
Do not include columns whose entries are all NaN
margins_name : string, default 'All'
Name of the row / column that will contain the totals
when margins is True.
Examples
--------
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
... "bar", "bar", "bar", "bar"],
... "B": ["one", "one", "one", "two", "two",
... "one", "one", "two", "two"],
... "C": ["small", "large", "large", "small",
... "small", "large", "small", "small",
... "large"],
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
>>> df
A B C D
0 foo one small 1
1 foo one large 2
2 foo one large 2
3 foo two small 3
4 foo two small 3
5 bar one large 4
6 bar one small 5
7 bar two small 6
8 bar two large 7
>>> table = pivot_table(df, values='D', index=['A', 'B'],
... columns=['C'], aggfunc=np.sum)
>>> table
... # doctest: +NORMALIZE_WHITESPACE
C large small
A B
bar one 4.0 5.0
two 7.0 6.0
foo one 4.0 1.0
two NaN 6.0
Returns
-------
table : DataFrame
See also
--------
DataFrame.pivot : pivot without aggregation that can handle
non-numeric data
"""
index = _convert_by(index)
columns = _convert_by(columns)

Expand Down Expand Up @@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
table = agged.unstack(to_unstack)

if not dropna:
from pandas import MultiIndex
try:
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
names=table.index.names)
Expand All @@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
except AttributeError:
pass # it's a single level or a series

if isinstance(table, DataFrame):
if isinstance(table, ABCDataFrame):
table = table.sort_index(axis=1)

if fill_value is not None:
Expand All @@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
if len(index) == 0 and len(columns) > 0:
table = table.T

# GH 15193 Makse sure empty columns are removed if dropna=True
if isinstance(table, DataFrame) and dropna:
# GH 15193 Make sure empty columns are removed if dropna=True
if isinstance(table, ABCDataFrame) and dropna:
table = table.dropna(how='all', axis=1)

return table


DataFrame.pivot_table = pivot_table


def _add_margins(table, data, values, rows, cols, aggfunc,
margins_name='All', fill_value=None):
if not isinstance(margins_name, compat.string_types):
Expand All @@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
else:
key = margins_name

if not values and isinstance(table, Series):
if not values and isinstance(table, ABCSeries):
# If there are no values and the table is a series, then there is only
# one column in the data. Compute grand margin and return it.
return table.append(Series({key: grand_margin[margins_name]}))
Expand All @@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
else:
row_margin[k] = grand_margin[k[0]]

from pandas import DataFrame
margin_dummy = DataFrame(row_margin, columns=[key]).T

row_names = result.index.names
Expand Down Expand Up @@ -402,7 +336,7 @@ def _convert_by(by):
if by is None:
by = []
elif (is_scalar(by) or
isinstance(by, (np.ndarray, Index, Series, Grouper)) or
isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or
hasattr(by, '__call__')):
by = [by]
else:
Expand Down Expand Up @@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
if values is not None and aggfunc is None:
raise ValueError("values cannot be used without an aggfunc.")

from pandas import DataFrame
df = DataFrame(data, index=common_idx)
if values is None:
df['__dummy__'] = 0
Expand Down Expand Up @@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'):
if names is None:
names = []
for i, arr in enumerate(arrs):
if isinstance(arr, Series) and arr.name is not None:
if isinstance(arr, ABCSeries) and arr.name is not None:
names.append(arr.name)
else:
names.append('%s_%d' % (prefix, i))
Expand Down

0 comments on commit 06850a1

Please sign in to comment.