Skip to content

Commit

Permalink
CLN: reshape (#29627)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Nov 20, 2019
1 parent bbc7173 commit 002a89c
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 71 deletions.
37 changes: 7 additions & 30 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
concat routines
"""

from typing import List
import warnings

import numpy as np
Expand Down Expand Up @@ -437,13 +438,13 @@ def get_result(self):
mgr = self.objs[0]._data.concat(
[x._data for x in self.objs], self.new_axes
)
cons = _get_series_result_type(mgr, self.objs)
cons = self.objs[0]._constructor
return cons(mgr, name=name).__finalize__(self, method="concat")

# combine as columns in a frame
else:
data = dict(zip(range(len(self.objs)), self.objs))
cons = _get_series_result_type(data)
cons = DataFrame

index, columns = self.new_axes
df = cons(data, index=index)
Expand Down Expand Up @@ -473,7 +474,7 @@ def get_result(self):
if not self.copy:
new_data._consolidate_inplace()

cons = _get_frame_result_type(new_data, self.objs)
cons = self.objs[0]._constructor
return cons._from_axes(new_data, self.new_axes).__finalize__(
self, method="concat"
)
Expand Down Expand Up @@ -520,13 +521,13 @@ def _get_new_axes(self):
new_axes[self.axis] = self._get_concat_axis()
return new_axes

def _get_comb_axis(self, i):
def _get_comb_axis(self, i: int) -> Index:
data_axis = self.objs[0]._get_block_manager_axis(i)
return get_objs_combined_axis(
self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort
)

def _get_concat_axis(self):
def _get_concat_axis(self) -> Index:
"""
Return index to be used along concatenation axis.
"""
Expand All @@ -537,7 +538,7 @@ def _get_concat_axis(self):
idx = ibase.default_index(len(self.objs))
return idx
elif self.keys is None:
names = [None] * len(self.objs)
names: List = [None] * len(self.objs)
num = 0
has_names = False
for i, x in enumerate(self.objs):
Expand Down Expand Up @@ -702,27 +703,3 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
return MultiIndex(
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
)


def _get_series_result_type(result, objs=None):
"""
return appropriate class of Series concat
input is either dict or array-like
"""
# TODO: See if we can just inline with _constructor_expanddim
# now that sparse is removed.

# concat Series with axis 1
if isinstance(result, dict):
return DataFrame

# otherwise it is a SingleBlockManager (axis = 0)
return objs[0]._constructor


def _get_frame_result_type(result, objs):
"""
return appropriate class of DataFrame-like concat
"""
# TODO: just inline this as _constructor.
return objs[0]
18 changes: 11 additions & 7 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from typing import List

import numpy as np

Expand All @@ -10,7 +11,7 @@
from pandas.core.dtypes.missing import notna

from pandas.core.arrays import Categorical
from pandas.core.frame import _shared_docs
from pandas.core.frame import DataFrame, _shared_docs
from pandas.core.indexes.base import Index
from pandas.core.reshape.concat import concat
from pandas.core.tools.numeric import to_numeric
Expand All @@ -21,20 +22,21 @@
% dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt")
)
def melt(
frame,
frame: DataFrame,
id_vars=None,
value_vars=None,
var_name=None,
value_name="value",
col_level=None,
):
) -> DataFrame:
# TODO: what about the existing index?
# If multiindex, gather names of columns on all level for checking presence
# of `id_vars` and `value_vars`
if isinstance(frame.columns, ABCMultiIndex):
cols = [x for c in frame.columns for x in c]
else:
cols = list(frame.columns)

if id_vars is not None:
if not is_list_like(id_vars):
id_vars = [id_vars]
Expand Down Expand Up @@ -119,7 +121,7 @@ def melt(
return frame._constructor(mdata, columns=mcolumns)


def lreshape(data, groups, dropna=True, label=None):
def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame:
"""
Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
Expand All @@ -129,6 +131,8 @@ def lreshape(data, groups, dropna=True, label=None):
groups : dict
{new_name : list_of_columns}
dropna : boolean, default True
label : object, default None
Dummy kwarg, not used.
Examples
--------
Expand Down Expand Up @@ -188,7 +192,7 @@ def lreshape(data, groups, dropna=True, label=None):
return data._constructor(mdata, columns=id_cols + pivot_cols)


def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
def wide_to_long(df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
r"""
Wide panel to long format. Less flexible but more user-friendly than melt.
Expand Down Expand Up @@ -412,14 +416,14 @@ def wide_to_long(df, stubnames, i, j, sep: str = "", suffix: str = r"\d+"):
two 2.9
"""

def get_var_names(df, stub, sep, suffix):
def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]:
regex = r"^{stub}{sep}{suffix}$".format(
stub=re.escape(stub), sep=re.escape(sep), suffix=suffix
)
pattern = re.compile(regex)
return [col for col in df.columns if pattern.match(col)]

def melt_stub(df, stub, i, j, value_vars, sep: str):
def melt_stub(df, stub: str, i, j, value_vars, sep: str):
newdf = melt(
df,
id_vars=i,
Expand Down
32 changes: 17 additions & 15 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def merge(


def _groupby_and_merge(
by, on, left, right, _merge_pieces, check_duplicates: bool = True
by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True
):
"""
groupby & merge; we are always performing a left-by type operation
Expand Down Expand Up @@ -313,7 +313,7 @@ def merge_asof(
suffixes=("_x", "_y"),
tolerance=None,
allow_exact_matches: bool = True,
direction="backward",
direction: str = "backward",
):
"""
Perform an asof merge. This is similar to a left-join except that we
Expand Down Expand Up @@ -1299,19 +1299,21 @@ def _get_join_indexers(
right_keys
), "left_key and right_keys must be the same length"

# bind `sort` arg. of _factorize_keys
fkeys = partial(_factorize_keys, sort=sort)

# get left & right join labels and num. of levels at each location
llab, rlab, shape = map(list, zip(*map(fkeys, left_keys, right_keys)))
mapped = (
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
for n in range(len(left_keys))
)
zipped = zip(*mapped)
llab, rlab, shape = [list(x) for x in zipped]

# get flat i8 keys from label lists
lkey, rkey = _get_join_keys(llab, rlab, shape, sort)

# factorize keys to a dense i8 space
# `count` is the num. of unique keys
# set(lkey) | set(rkey) == range(count)
lkey, rkey, count = fkeys(lkey, rkey)
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

# preserve left frame order if how == 'left' and sort == False
kwargs = copy.copy(kwargs)
Expand Down Expand Up @@ -1487,12 +1489,12 @@ def get_result(self):
return result


def _asof_function(direction):
def _asof_function(direction: str):
name = "asof_join_{dir}".format(dir=direction)
return getattr(libjoin, name, None)


def _asof_by_function(direction):
def _asof_by_function(direction: str):
name = "asof_join_{dir}_on_X_by_Y".format(dir=direction)
return getattr(libjoin, name, None)

Expand Down Expand Up @@ -1536,7 +1538,7 @@ def __init__(
how: str = "asof",
tolerance=None,
allow_exact_matches: bool = True,
direction="backward",
direction: str = "backward",
):

self.by = by
Expand Down Expand Up @@ -1775,11 +1777,11 @@ def flip(xs):

def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):

# bind `sort` argument
fkeys = partial(_factorize_keys, sort=sort)

# left & right join labels and num. of levels at each location
mapped = (fkeys(index.levels[n], join_keys[n]) for n in range(len(index.levels)))
mapped = (
_factorize_keys(index.levels[n], join_keys[n], sort=sort)
for n in range(index.nlevels)
)
zipped = zip(*mapped)
rcodes, lcodes, shape = [list(x) for x in zipped]
if sort:
Expand All @@ -1804,7 +1806,7 @@ def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool):
lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort)

# factorize keys to a dense i8 space
lkey, rkey, count = fkeys(lkey, rkey)
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)

return libjoin.left_outer_join(lkey, rkey, count, sort=sort)

Expand Down
Loading

0 comments on commit 002a89c

Please sign in to comment.