Skip to content

Commit

Permalink
Backport PR #45283: BUG: do not replace all nulls with "NaN"-string i…
Browse files Browse the repository at this point in the history
…n Series index (#45473)

Co-authored-by: realead <egor.dranischnikow@googlemail.com>
  • Loading branch information
meeseeksmachine and realead authored Jan 19, 2022
1 parent 4d10b87 commit aaba0ef
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,7 @@ Other
- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`)
- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`)
- Bug in :meth:`DataFrame.eval` where ``resolvers`` argument was overriding the default resolvers (:issue:`34966`)
- :meth:`Series.__repr__` and :meth:`DataFrame.__repr__` no longer replace all null-values in indexes with "NaN" but use their real string-representations. "NaN" is used only for ``float("nan")`` (:issue:`45263`)

.. ---------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
25 changes: 25 additions & 0 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,31 @@ cdef bint checknull_with_nat_and_na(object obj):
return checknull_with_nat(obj) or obj is C_NA


@cython.wraparound(False)
@cython.boundscheck(False)
def is_float_nan(values: ndarray) -> ndarray:
"""
True for elements which correspond to a float nan

Returns
-------
ndarray[bool]
"""
cdef:
ndarray[uint8_t] result
Py_ssize_t i, N
object val

N = len(values)
result = np.zeros(N, dtype=np.uint8)

for i in range(N):
val = values[i]
if util.is_nan(val):
result[i] = True
return result.view(bool)


@cython.wraparound(False)
@cython.boundscheck(False)
def is_numeric_na(values: ndarray) -> ndarray:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
is_datetime_array,
no_default,
)
from pandas._libs.missing import is_float_nan
from pandas._libs.tslibs import (
IncompatibleFrequency,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -1391,7 +1392,7 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

# could have nans
mask = isna(values)
mask = is_float_nan(values)
if mask.any():
result_arr = np.array(result)
result_arr[mask] = na_rep
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,27 @@ def test_repr_with_mi_nat(self, float_string_frame):
expected = " X\nNaT a 1\n2013-01-01 b 2"
assert result == expected

def test_repr_with_different_nulls(self):
# GH45263
df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT])
result = repr(df)
expected = """ 0
True 1
None 2
NaN 3
NaT 4"""
assert result == expected

def test_repr_with_different_nulls_cols(self):
# GH45263
d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]}
df = DataFrame(data=d)
result = repr(df)
expected = """ NaN None NaT True
0 1 3 6 8
1 2 4 7 9"""
assert result == expected

def test_multiindex_na_repr(self):
# only an issue with long columns
df3 = DataFrame(
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ def test_format_missing(self, vals, nulls_fixture):
index = Index(vals)

formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"]
null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture)
expected = [str(index[0]), str(index[1]), str(index[2]), null_repr]

assert formatted == expected
assert index[3] is nulls_fixture
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/series/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,13 @@ def test_float_repr(self):
expected = "0 1.0\ndtype: object"
assert repr(ser) == expected

def test_different_null_objects(self):
# GH#45263
ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
result = repr(ser)
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
assert result == expected


class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
Expand Down

0 comments on commit aaba0ef

Please sign in to comment.