CLN: reshape (#29627)

pandas-dev · Nov 20, 2019 · 002a89c · 002a89c
1 parent bbc7173
commit 002a89c
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 71 deletions.
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -2,6 +2,7 @@
 concat routines
 """
 
+from typing import List
 import warnings
 
 import numpy as np
@@ -437,13 +438,13 @@ def get_result(self):
                 mgr = self.objs[0]._data.concat(
                     [x._data for x in self.objs], self.new_axes
                 )
-                cons = _get_series_result_type(mgr, self.objs)
+                cons = self.objs[0]._constructor
                 return cons(mgr, name=name).__finalize__(self, method="concat")
 
             # combine as columns in a frame
             else:
                 data = dict(zip(range(len(self.objs)), self.objs))
-                cons = _get_series_result_type(data)
+                cons = DataFrame
 
                 index, columns = self.new_axes
                 df = cons(data, index=index)
@@ -473,7 +474,7 @@ def get_result(self):
             if not self.copy:
                 new_data._consolidate_inplace()
 
-            cons = _get_frame_result_type(new_data, self.objs)
+            cons = self.objs[0]._constructor
             return cons._from_axes(new_data, self.new_axes).__finalize__(
                 self, method="concat"
             )
@@ -520,13 +521,13 @@ def _get_new_axes(self):
         new_axes[self.axis] = self._get_concat_axis()
         return new_axes
 
-    def _get_comb_axis(self, i):
+    def _get_comb_axis(self, i: int) -> Index:
         data_axis = self.objs[0]._get_block_manager_axis(i)
         return get_objs_combined_axis(
             self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
         )
 
-    def _get_concat_axis(self):
+    def _get_concat_axis(self) -> Index:
         """
         Return index to be used along concatenation axis.
         """
@@ -537,7 +538,7 @@ def _get_concat_axis(self):
                 idx = ibase.default_index(len(self.objs))
                 return idx
             elif self.keys is None:
-                names = [None] * len(self.objs)
+                names: List = [None] * len(self.objs)
                 num = 0
                 has_names = False
                 for i, x in enumerate(self.objs):
@@ -702,27 +703,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
     return MultiIndex(
         levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
     )
-
-
-def _get_series_result_type(result, objs=None):
-    """
-    return appropriate class of Series concat
-    input is either dict or array-like
-    """
-    # TODO: See if we can just inline with _constructor_expanddim
-    # now that sparse is removed.
-
-    # concat Series with axis 1
-    if isinstance(result, dict):
-        return DataFrame
-
-    # otherwise it is a SingleBlockManager (axis = 0)
-    return objs[0]._constructor
-
-
-def _get_frame_result_type(result, objs):
-    """
-    return appropriate class of DataFrame-like concat
-    """
-    # TODO: just inline this as _constructor.
-    return objs[0]
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -1,4 +1,5 @@
 import re
+from typing import List
 
 import numpy as np
 
@@ -10,7 +11,7 @@
 from pandas.core.dtypes.missing import notna
 
 from pandas.core.arrays import Categorical
-from pandas.core.frame import _shared_docs
+from pandas.core.frame import DataFrame, _shared_docs
 from pandas.core.indexes.base import Index
 from pandas.core.reshape.concat import concat
 from pandas.core.tools.numeric import to_numeric
@@ -21,20 +22,21 @@
     % dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
 )
 def melt(
-    frame,
+    frame: DataFrame,
     id_vars=None,
     value_vars=None,
     var_name=None,
     value_name="value",
     col_level=None,
-):
+) -> DataFrame:
     # TODO: what about the existing index?
     # If multiindex, gather names of columns on all level for checking presence
     # of `id_vars` and `value_vars`
     if isinstance(frame.columns, ABCMultiIndex):
         cols = [x for c in frame.columns for x in c]
     else:
         cols = list(frame.columns)
+
     if id_vars is not None:
         if not is_list_like(id_vars):
             id_vars = [id_vars]
@@ -119,7 +121,7 @@ def melt(
     return frame._constructor(mdata, columns=mcolumns)
 
 
-def lreshape(data, groups, dropna=True, label=None):
+def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
     """
     Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
 
@@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None):
     groups : dict
         {new_name : list_of_columns}
     dropna : boolean, default True
+    label : object, default None
+        Dummy kwarg, not used.
 
     Examples
     --------
@@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None):
     return data._constructor(mdata, columns=id_cols + pivot_cols)
 
 
-def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
+def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
     r"""
     Wide panel to long format. Less flexible but more user-friendly than melt.
 
@@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
                 two  2.9
     """
 
-    def get_var_names(df, stub, sep, suffix):
+    def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]:
         regex = r"^{stub}{sep}{suffix}$".format(
             stub=re.escape(stub), sep=re.escape(sep), suffix=suffix
         )
         pattern = re.compile(regex)
         return [col for col in df.columns if pattern.match(col)]
 
-    def melt_stub(df, stub, i, j, value_vars, sep: str):
+    def melt_stub(df, stub: str, i, j, value_vars, sep: str):
         newdf = melt(
             df,
             id_vars=i,

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -92,7 +92,7 @@ def merge(
 
 
 def _groupby_and_merge(
-    by, on, left, right, _merge_pieces, check_duplicates: bool = True
+    by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True
 ):
     """
     groupby & merge; we are always performing a left-by type operation
@@ -313,7 +313,7 @@ def merge_asof(
     suffixes=("_x", "_y"),
     tolerance=None,
     allow_exact_matches: bool = True,
-    direction="backward",
+    direction: str = "backward",
 ):
     """
     Perform an asof merge. This is similar to a left-join except that we
@@ -1299,19 +1299,21 @@ def _get_join_indexers(
         right_keys
     ), "left_key and right_keys must be the same length"
 
-    # bind `sort` arg. of _factorize_keys
-    fkeys = partial(_factorize_keys, sort=sort)
-
     # get left & right join labels and num. of levels at each location
-    llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys)))
+    mapped = (
+        _factorize_keys(left_keys[n], right_keys[n], sort=sort)
+        for n in range(len(left_keys))
+    )
+    zipped = zip(*mapped)
+    llab, rlab, shape = [list(x) for x in zipped]
 
     # get flat i8 keys from label lists
     lkey, rkey = _get_join_keys(llab, rlab, shape, sort)
 
     # factorize keys to a dense i8 space
     # `count` is the num. of unique keys
     # set(lkey) | set(rkey) == range(count)
-    lkey, rkey, count = fkeys(lkey, rkey)
+    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
 
     # preserve left frame order if how == 'left' and sort == False
     kwargs = copy.copy(kwargs)
@@ -1487,12 +1489,12 @@ def get_result(self):
         return result
 
 
-def _asof_function(direction):
+def _asof_function(direction: str):
     name = "asof_join_{dir}".format(dir=direction)
     return getattr(libjoin, name, None)
 
 
-def _asof_by_function(direction):
+def _asof_by_function(direction: str):
     name = "asof_join_{dir}_on_X_by_Y".format(dir=direction)
     return getattr(libjoin, name, None)
 
@@ -1536,7 +1538,7 @@ def __init__(
         how: str = "asof",
         tolerance=None,
         allow_exact_matches: bool = True,
-        direction="backward",
+        direction: str = "backward",
     ):
 
         self.by = by
@@ -1775,11 +1777,11 @@ def flip(xs):
 
 def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
 
-    # bind `sort` argument
-    fkeys = partial(_factorize_keys, sort=sort)
-
     # left & right join labels and num. of levels at each location
-    mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels)))
+    mapped = (
+        _factorize_keys(index.levels[n], join_keys[n], sort=sort)
+        for n in range(index.nlevels)
+    )
     zipped = zip(*mapped)
     rcodes, lcodes, shape = [list(x) for x in zipped]
     if sort:
@@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
     lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort)
 
     # factorize keys to a dense i8 space
-    lkey, rkey, count = fkeys(lkey, rkey)
+    lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
 
     return libjoin.left_outer_join(lkey, rkey, count, sort=sort)