Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport PR #45283 on branch 1.4.x (BUG: do not replace all nulls with "NaN"-string in Series index) #45473

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,7 @@ Other
- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`)
- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`)
- Bug in :meth:`DataFrame.eval` where ``resolvers`` argument was overriding the default resolvers (:issue:`34966`)
- :meth:`Series.__repr__` and :meth:`DataFrame.__repr__` no longer replace all null-values in indexes with "NaN" but use their real string-representations. "NaN" is used only for ``float("nan")`` (:issue:`45263`)

.. ---------------------------------------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/missing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
25 changes: 25 additions & 0 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,31 @@ cdef bint checknull_with_nat_and_na(object obj):
return checknull_with_nat(obj) or obj is C_NA


@cython.wraparound(False)
@cython.boundscheck(False)
def is_float_nan(values: ndarray) -> ndarray:
"""
True for elements which correspond to a float nan

Returns
-------
ndarray[bool]
"""
cdef:
ndarray[uint8_t] result
Py_ssize_t i, N
object val

N = len(values)
result = np.zeros(N, dtype=np.uint8)

for i in range(N):
val = values[i]
if util.is_nan(val):
result[i] = True
return result.view(bool)


@cython.wraparound(False)
@cython.boundscheck(False)
def is_numeric_na(values: ndarray) -> ndarray:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
is_datetime_array,
no_default,
)
from pandas._libs.missing import is_float_nan
from pandas._libs.tslibs import (
IncompatibleFrequency,
OutOfBoundsDatetime,
Expand Down Expand Up @@ -1391,7 +1392,7 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

# could have nans
mask = isna(values)
mask = is_float_nan(values)
if mask.any():
result_arr = np.array(result)
result_arr[mask] = na_rep
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,27 @@ def test_repr_with_mi_nat(self, float_string_frame):
expected = " X\nNaT a 1\n2013-01-01 b 2"
assert result == expected

def test_repr_with_different_nulls(self):
# GH45263
df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT])
result = repr(df)
expected = """ 0
True 1
None 2
NaN 3
NaT 4"""
assert result == expected

def test_repr_with_different_nulls_cols(self):
# GH45263
d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]}
df = DataFrame(data=d)
result = repr(df)
expected = """ NaN None NaT True
0 1 3 6 8
1 2 4 7 9"""
assert result == expected

def test_multiindex_na_repr(self):
# only an issue with long columns
df3 = DataFrame(
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ def test_format_missing(self, vals, nulls_fixture):
index = Index(vals)

formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"]
null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture)
expected = [str(index[0]), str(index[1]), str(index[2]), null_repr]

assert formatted == expected
assert index[3] is nulls_fixture
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/series/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,13 @@ def test_float_repr(self):
expected = "0 1.0\ndtype: object"
assert repr(ser) == expected

def test_different_null_objects(self):
# GH#45263
ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
result = repr(ser)
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
assert result == expected


class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
Expand Down