Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: replace _interleave_dtype with _find_common_type #15765

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ Bug Fixes


- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`)
- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`)

- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`)

Expand Down
59 changes: 9 additions & 50 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

from pandas.core.base import PandasObject

from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
from pandas.types.dtypes import (ExtensionDtype, DatetimeTZDtype,
CategoricalDtype)
from pandas.types.common import (_TD_DTYPE, _NS_DTYPE,
_ensure_int64, _ensure_platform_int,
is_integer,
Expand Down Expand Up @@ -4496,55 +4497,13 @@ def _interleaved_dtype(blocks):
if not len(blocks):
return None

counts = defaultdict(list)
for x in blocks:
counts[type(x)].append(x)

have_int = len(counts[IntBlock]) > 0
have_bool = len(counts[BoolBlock]) > 0
have_object = len(counts[ObjectBlock]) > 0
have_float = len(counts[FloatBlock]) > 0
have_complex = len(counts[ComplexBlock]) > 0
have_dt64 = len(counts[DatetimeBlock]) > 0
have_dt64_tz = len(counts[DatetimeTZBlock]) > 0
have_td64 = len(counts[TimeDeltaBlock]) > 0
have_cat = len(counts[CategoricalBlock]) > 0
# TODO: have_sparse is not used
have_sparse = len(counts[SparseBlock]) > 0 # noqa
have_numeric = have_float or have_complex or have_int
has_non_numeric = have_dt64 or have_dt64_tz or have_td64 or have_cat

if (have_object or
(have_bool and
(have_numeric or have_dt64 or have_dt64_tz or have_td64)) or
(have_numeric and has_non_numeric) or have_cat or have_dt64 or
have_dt64_tz or have_td64):
return np.dtype(object)
elif have_bool:
return np.dtype(bool)
elif have_int and not have_float and not have_complex:
# if we are mixing unsigned and signed, then return
# the next biggest int type (if we can)
lcd = _find_common_type([b.dtype for b in counts[IntBlock]])
kinds = set([i.dtype.kind for i in counts[IntBlock]])
if len(kinds) == 1:
return lcd

if lcd == 'uint64' or lcd == 'int64':
return np.dtype('int64')

# return 1 bigger on the itemsize if unsinged
if lcd.kind == 'u':
return np.dtype('int%s' % (lcd.itemsize * 8 * 2))
return lcd

elif have_int and have_float and not have_complex:
return np.dtype('float64')
elif have_complex:
return np.dtype('c16')
else:
introspection_blks = counts[FloatBlock] + counts[SparseBlock]
return _find_common_type([b.dtype for b in introspection_blks])
dtype = _find_common_type([b.dtype for b in blocks])

# only numpy compat
if isinstance(dtype, ExtensionDtype):
dtype = np.object

return dtype


def _consolidate(blocks):
Expand Down
14 changes: 2 additions & 12 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,19 +1183,9 @@ def _assert_replace_conversion(self, from_key, to_key, how):

result = obj.replace(replacer)

# buggy on windows for bool/int64
if (from_key == 'bool' and
to_key == 'int64' and
tm.is_platform_windows()):
pytest.skip("windows platform buggy: {0} -> {1}".format
(from_key, to_key))

if ((from_key == 'float64' and to_key in ('bool', 'int64')) or
if ((from_key == 'float64' and to_key in ('int64')) or
(from_key == 'complex128' and
to_key in ('bool', 'int64', 'float64')) or

# GH12747 The result must be int?
(from_key == 'int64' and to_key in ('bool'))):
to_key in ('int64', 'float64'))):

# buggy on 32-bit
if tm.is_platform_32bit():
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ def check_replace(to_rep, val, expected):
tr, v = [3, 4], [3.5, pd.Timestamp('20130101')]
check_replace(tr, v, e)

# casts to float
e = pd.Series([0, 1, 2, 3.5, 1])
# casts to object
e = pd.Series([0, 1, 2, 3.5, True], dtype='object')
tr, v = [3, 4], [3.5, True]
check_replace(tr, v, e)

Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/types/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,20 @@ def test_numpy_dtypes(self):
((np.object, np.float32), np.object),
((np.object, np.int16), np.object),

# bool with int
((np.dtype('bool'), np.int64), np.object),
((np.dtype('bool'), np.int32), np.object),
((np.dtype('bool'), np.int16), np.object),
((np.dtype('bool'), np.int8), np.object),
((np.dtype('bool'), np.uint64), np.object),
((np.dtype('bool'), np.uint32), np.object),
((np.dtype('bool'), np.uint16), np.object),
((np.dtype('bool'), np.uint8), np.object),

# bool with float
((np.dtype('bool'), np.float64), np.object),
((np.dtype('bool'), np.float32), np.object),

((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
np.dtype('datetime64[ns]')),
((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
Expand Down
28 changes: 27 additions & 1 deletion pandas/types/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,12 +892,28 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):


def _find_common_type(types):
"""Find a common data type among the given dtypes."""
"""
Find a common data type among the given dtypes.

Parameters
----------
types : list of dtypes

Returns
-------
pandas extension or numpy dtype

See Also
--------
numpy.find_common_type

"""

if len(types) == 0:
raise ValueError('no types given')

first = types[0]

# workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
# => object
if all(is_dtype_equal(first, t) for t in types[1:]):
Expand All @@ -912,4 +928,14 @@ def _find_common_type(types):
if all(is_timedelta64_dtype(t) for t in types):
return np.dtype('timedelta64[ns]')

# don't mix bool / int or float or complex
# this is different from numpy, which casts bool with float/int as int
has_bools = any(is_bool_dtype(t) for t in types)
if has_bools:
has_ints = any(is_integer_dtype(t) for t in types)
has_floats = any(is_float_dtype(t) for t in types)
has_complex = any(is_complex_dtype(t) for t in types)
if has_ints or has_floats or has_complex:
return np.object

return np.find_common_type(types, [])