diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 485ad4cedcacb..bb93ce1a12b2a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -1050,6 +1050,7 @@ Other - Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`) - Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`) - Bug in :meth:`DataFrame.eval` where ``resolvers`` argument was overriding the default resolvers (:issue:`34966`) +- :meth:`Series.__repr__` and :meth:`DataFrame.__repr__` no longer replace all null-values in indexes with "NaN" but use their real string-representations. "NaN" is used only for ``float("nan")`` (:issue:`45263`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi index ab6841e7ddf35..3a4cc9def07bd 100644 --- a/pandas/_libs/missing.pyi +++ b/pandas/_libs/missing.pyi @@ -14,3 +14,4 @@ def checknull(val: object, inf_as_na: bool = ...) -> bool: ... def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ... def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ... +def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ... diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 585b535775397..62977f0fd2b4c 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -248,6 +248,31 @@ cdef bint checknull_with_nat_and_na(object obj): return checknull_with_nat(obj) or obj is C_NA +@cython.wraparound(False) +@cython.boundscheck(False) +def is_float_nan(values: ndarray) -> ndarray: + """ + True for elements which correspond to a float nan + + Returns + ------- + ndarray[bool] + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if util.is_nan(val): + result[i] = True + return result.view(bool) + + @cython.wraparound(False) @cython.boundscheck(False) def is_numeric_na(values: ndarray) -> ndarray: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 22ab2bff4cb09..af295384690e9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -33,6 +33,7 @@ is_datetime_array, no_default, ) +from pandas._libs.missing import is_float_nan from pandas._libs.tslibs import ( IncompatibleFrequency, OutOfBoundsDatetime, @@ -1391,7 +1392,7 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t] result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] # could have nans - mask = isna(values) + mask = is_float_nan(values) if mask.any(): result_arr = np.array(result) result_arr[mask] = na_rep diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index e71f625a6ead6..f19edf5722ca1 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -68,6 +68,27 @@ def test_repr_with_mi_nat(self, float_string_frame): expected = " X\nNaT a 1\n2013-01-01 b 2" assert result == expected + def test_repr_with_different_nulls(self): + # GH45263 + df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT]) + result = repr(df) + expected = """ 0 +True 1 +None 2 +NaN 3 +NaT 4""" + assert result == expected + + def test_repr_with_different_nulls_cols(self): + # GH45263 + d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]} + df = DataFrame(data=d) + result = repr(df) + expected = """ NaN None NaT True +0 1 3 6 8 +1 2 4 7 9""" + assert result == expected + def test_multiindex_na_repr(self): # only an issue with long columns df3 = DataFrame( diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3447a2ceef7c1..1145de14ad3c4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -658,7 +658,8 @@ def test_format_missing(self, vals, nulls_fixture): index = Index(vals) formatted = index.format() - expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"] + null_repr = "NaN" if isinstance(nulls_fixture, float) else str(nulls_fixture) + expected = [str(index[0]), str(index[1]), str(index[2]), null_repr] assert formatted == expected assert index[3] is nulls_fixture diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index e243f609145f3..a12bc1df37269 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -258,6 +258,13 @@ def test_float_repr(self): expected = "0 1.0\ndtype: object" assert repr(ser) == expected + def test_different_null_objects(self): + # GH#45263 + ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT]) + result = repr(ser) + expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64" + assert result == expected + class TestCategoricalRepr: def test_categorical_repr_unicode(self):