diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3e8d19096a36e..c2322ae626cfd 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,6 +2,7 @@ concat routines """ +from typing import List import warnings import numpy as np @@ -437,13 +438,13 @@ def get_result(self): mgr = self.objs[0]._data.concat( [x._data for x in self.objs], self.new_axes ) - cons = _get_series_result_type(mgr, self.objs) + cons = self.objs[0]._constructor return cons(mgr, name=name).__finalize__(self, method="concat") # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) - cons = _get_series_result_type(data) + cons = DataFrame index, columns = self.new_axes df = cons(data, index=index) @@ -473,7 +474,7 @@ def get_result(self): if not self.copy: new_data._consolidate_inplace() - cons = _get_frame_result_type(new_data, self.objs) + cons = self.objs[0]._constructor return cons._from_axes(new_data, self.new_axes).__finalize__( self, method="concat" ) @@ -520,13 +521,13 @@ def _get_new_axes(self): new_axes[self.axis] = self._get_concat_axis() return new_axes - def _get_comb_axis(self, i): + def _get_comb_axis(self, i: int) -> Index: data_axis = self.objs[0]._get_block_manager_axis(i) return get_objs_combined_axis( self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort ) - def _get_concat_axis(self): + def _get_concat_axis(self) -> Index: """ Return index to be used along concatenation axis. """ @@ -537,7 +538,7 @@ def _get_concat_axis(self): idx = ibase.default_index(len(self.objs)) return idx elif self.keys is None: - names = [None] * len(self.objs) + names: List = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): @@ -702,27 +703,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde return MultiIndex( levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False ) - - -def _get_series_result_type(result, objs=None): - """ - return appropriate class of Series concat - input is either dict or array-like - """ - # TODO: See if we can just inline with _constructor_expanddim - # now that sparse is removed. - - # concat Series with axis 1 - if isinstance(result, dict): - return DataFrame - - # otherwise it is a SingleBlockManager (axis = 0) - return objs[0]._constructor - - -def _get_frame_result_type(result, objs): - """ - return appropriate class of DataFrame-like concat - """ - # TODO: just inline this as _constructor. - return objs[0] diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 16c04454898db..4cba52c5cd651 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -1,4 +1,5 @@ import re +from typing import List import numpy as np @@ -10,7 +11,7 @@ from pandas.core.dtypes.missing import notna from pandas.core.arrays import Categorical -from pandas.core.frame import _shared_docs +from pandas.core.frame import DataFrame, _shared_docs from pandas.core.indexes.base import Index from pandas.core.reshape.concat import concat from pandas.core.tools.numeric import to_numeric @@ -21,13 +22,13 @@ % dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt") ) def melt( - frame, + frame: DataFrame, id_vars=None, value_vars=None, var_name=None, value_name="value", col_level=None, -): +) -> DataFrame: # TODO: what about the existing index? # If multiindex, gather names of columns on all level for checking presence # of `id_vars` and `value_vars` @@ -35,6 +36,7 @@ def melt( cols = [x for c in frame.columns for x in c] else: cols = list(frame.columns) + if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] @@ -119,7 +121,7 @@ def melt( return frame._constructor(mdata, columns=mcolumns) -def lreshape(data, groups, dropna=True, label=None): +def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: """ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot @@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None): groups : dict {new_name : list_of_columns} dropna : boolean, default True + label : object, default None + Dummy kwarg, not used. Examples -------- @@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None): return data._constructor(mdata, columns=id_cols + pivot_cols) -def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): +def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): r""" Wide panel to long format. Less flexible but more user-friendly than melt. @@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"): two 2.9 """ - def get_var_names(df, stub, sep, suffix): + def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: regex = r"^{stub}{sep}{suffix}$".format( stub=re.escape(stub), sep=re.escape(sep), suffix=suffix ) pattern = re.compile(regex) return [col for col in df.columns if pattern.match(col)] - def melt_stub(df, stub, i, j, value_vars, sep: str): + def melt_stub(df, stub: str, i, j, value_vars, sep: str): newdf = melt( df, id_vars=i, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 76c4b328eb4db..4d838db6c95f6 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -92,7 +92,7 @@ def merge( def _groupby_and_merge( - by, on, left, right, _merge_pieces, check_duplicates: bool = True + by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True ): """ groupby & merge; we are always performing a left-by type operation @@ -313,7 +313,7 @@ def merge_asof( suffixes=("_x", "_y"), tolerance=None, allow_exact_matches: bool = True, - direction="backward", + direction: str = "backward", ): """ Perform an asof merge. This is similar to a left-join except that we @@ -1299,11 +1299,13 @@ def _get_join_indexers( right_keys ), "left_key and right_keys must be the same length" - # bind `sort` arg. of _factorize_keys - fkeys = partial(_factorize_keys, sort=sort) - # get left & right join labels and num. of levels at each location - llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys))) + mapped = ( + _factorize_keys(left_keys[n], right_keys[n], sort=sort) + for n in range(len(left_keys)) + ) + zipped = zip(*mapped) + llab, rlab, shape = [list(x) for x in zipped] # get flat i8 keys from label lists lkey, rkey = _get_join_keys(llab, rlab, shape, sort) @@ -1311,7 +1313,7 @@ def _get_join_indexers( # factorize keys to a dense i8 space # `count` is the num. of unique keys # set(lkey) | set(rkey) == range(count) - lkey, rkey, count = fkeys(lkey, rkey) + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) # preserve left frame order if how == 'left' and sort == False kwargs = copy.copy(kwargs) @@ -1487,12 +1489,12 @@ def get_result(self): return result -def _asof_function(direction): +def _asof_function(direction: str): name = "asof_join_{dir}".format(dir=direction) return getattr(libjoin, name, None) -def _asof_by_function(direction): +def _asof_by_function(direction: str): name = "asof_join_{dir}_on_X_by_Y".format(dir=direction) return getattr(libjoin, name, None) @@ -1536,7 +1538,7 @@ def __init__( how: str = "asof", tolerance=None, allow_exact_matches: bool = True, - direction="backward", + direction: str = "backward", ): self.by = by @@ -1775,11 +1777,11 @@ def flip(xs): def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): - # bind `sort` argument - fkeys = partial(_factorize_keys, sort=sort) - # left & right join labels and num. of levels at each location - mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels))) + mapped = ( + _factorize_keys(index.levels[n], join_keys[n], sort=sort) + for n in range(index.nlevels) + ) zipped = zip(*mapped) rcodes, lcodes, shape = [list(x) for x in zipped] if sort: @@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) # factorize keys to a dense i8 space - lkey, rkey, count = fkeys(lkey, rkey) + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) return libjoin.left_outer_join(lkey, rkey, count, sort=sort) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0626420d9c114..b126b6e221ccc 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING, Callable, Dict, Tuple, Union + import numpy as np from pandas.util._decorators import Appender, Substitution @@ -14,6 +16,9 @@ from pandas.core.reshape.util import cartesian_product from pandas.core.series import Series +if TYPE_CHECKING: + from pandas import DataFrame + # Note: We need to make sure `frame` is imported before `pivot`, otherwise # _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency @@ -180,14 +185,14 @@ def pivot_table( def _add_margins( - table, + table: Union["Series", "DataFrame"], data, values, rows, cols, aggfunc, observed=None, - margins_name="All", + margins_name: str = "All", fill_value=None, ): if not isinstance(margins_name, str): @@ -200,14 +205,16 @@ def _add_margins( grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) - # could be passed a Series object with no 'columns' - if hasattr(table, "columns"): + if table.ndim == 2: + # i.e. DataFramae for level in table.columns.names[1:]: if margins_name in table.columns.get_level_values(level): raise ValueError(msg) if len(rows) > 1: - key = (margins_name,) + ("",) * (len(rows) - 1) + key = (margins_name,) + ("",) * ( + len(rows) - 1 + ) # type: Union[str, Tuple[str, ...]] else: key = margins_name @@ -216,7 +223,7 @@ def _add_margins( # one column in the data. Compute grand margin and return it. return table.append(Series({key: grand_margin[margins_name]})) - if values: + elif values: marginal_result_set = _generate_marginal_results( table, data, @@ -232,12 +239,15 @@ def _add_margins( return marginal_result_set result, margin_keys, row_margin = marginal_result_set else: + # no values, and table is a DataFrame + assert isinstance(table, ABCDataFrame) marginal_result_set = _generate_marginal_results_without_values( table, data, rows, cols, aggfunc, observed, margins_name ) if not isinstance(marginal_result_set, tuple): return marginal_result_set result, margin_keys, row_margin = marginal_result_set + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) # populate grand margin for k in margin_keys: @@ -266,7 +276,7 @@ def _add_margins( return result -def _compute_grand_margin(data, values, aggfunc, margins_name="All"): +def _compute_grand_margin(data, values, aggfunc, margins_name: str = "All"): if values: grand_margin = {} @@ -289,7 +299,15 @@ def _compute_grand_margin(data, values, aggfunc, margins_name="All"): def _generate_marginal_results( - table, data, values, rows, cols, aggfunc, observed, grand_margin, margins_name="All" + table, + data, + values, + rows, + cols, + aggfunc, + observed, + grand_margin, + margins_name: str = "All", ): if len(cols) > 0: # need to "interleave" the margins @@ -353,7 +371,7 @@ def _all_key(key): def _generate_marginal_results_without_values( - table, data, rows, cols, aggfunc, observed, margins_name="All" + table: "DataFrame", data, rows, cols, aggfunc, observed, margins_name: str = "All" ): if len(cols) > 0: # need to "interleave" the margins @@ -406,7 +424,7 @@ def _convert_by(by): @Substitution("\ndata : DataFrame") @Appender(_shared_docs["pivot"], indents=1) -def pivot(data, index=None, columns=None, values=None): +def pivot(data: "DataFrame", index=None, columns=None, values=None): if values is None: cols = [columns] if index is None else [index, columns] append = index is None @@ -436,8 +454,8 @@ def crosstab( colnames=None, aggfunc=None, margins=False, - margins_name="All", - dropna=True, + margins_name: str = "All", + dropna: bool = True, normalize=False, ): """ @@ -546,7 +564,7 @@ def crosstab( if pass_objs: common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) - data = {} + data = {} # type: dict data.update(zip(rownames, index)) data.update(zip(colnames, columns)) @@ -585,7 +603,7 @@ def crosstab( return table -def _normalize(table, normalize, margins, margins_name="All"): +def _normalize(table, normalize, margins: bool, margins_name="All"): if not isinstance(normalize, (bool, str)): axis_subs = {0: "index", 1: "columns"} @@ -601,7 +619,7 @@ def _normalize(table, normalize, margins, margins_name="All"): "all": lambda x: x / x.sum(axis=1).sum(axis=0), "columns": lambda x: x / x.sum(), "index": lambda x: x.div(x.sum(axis=1), axis=0), - } + } # type: Dict[Union[bool, str], Callable] normalizers[True] = normalizers["all"] @@ -668,7 +686,7 @@ def _normalize(table, normalize, margins, margins_name="All"): return table -def _get_names(arrs, names, prefix="row"): +def _get_names(arrs, names, prefix: str = "row"): if names is None: names = [] for i, arr in enumerate(arrs): diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 073bb4707f890..bfaa49dd576dc 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -496,7 +496,7 @@ def _convert_bin_to_datelike_type(bins, dtype): def _format_labels( - bins, precision, right: bool = True, include_lowest: bool = False, dtype=None + bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None ): """ based on the dtype, return our labels """ @@ -565,7 +565,7 @@ def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original): return fac, bins -def _round_frac(x, precision): +def _round_frac(x, precision: int): """ Round the fractional part of the given number """ @@ -580,7 +580,7 @@ def _round_frac(x, precision): return np.around(x, digits) -def _infer_precision(base_precision, bins): +def _infer_precision(base_precision: int, bins) -> int: """Infer an appropriate precision for _round_frac """ for precision in range(base_precision, 20):