From aae62135fe7f795c6218de432e9409d25d02e627 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 12 Mar 2013 21:04:25 -0400 Subject: [PATCH] BUG: Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from other values), (GH2850_) BUG: issue in test_index.py/test_format 1) printing of 'nan' rather than the na_rep (NaN) is inconcistent with everywhere else 2) a 'None' in the index is defacto treated as NaN, is this wrong? CLN: constistency among index for NaN/NaT values --- RELEASE.rst | 3 +++ pandas/core/index.py | 38 +++++++++++++++++++++++++++++-------- pandas/tests/test_format.py | 33 +++++++++++++++++++++++++++++++- pandas/tests/test_index.py | 5 +++-- 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index d79ede4dad26e..94bcf6cb7b187 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -145,6 +145,8 @@ pandas 0.11.0 - Bug in DataFrame column insertion when the column creation fails, existing frame is left in an irrecoverable state (GH3010_) - Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_) + - Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from + other values), (GH2850_) .. _GH622: https://github.com/pydata/pandas/issues/622 .. _GH797: https://github.com/pydata/pandas/issues/797 @@ -161,6 +163,7 @@ pandas 0.11.0 .. _GH2867: https://github.com/pydata/pandas/issues/2867 .. _GH2807: https://github.com/pydata/pandas/issues/2807 .. _GH2849: https://github.com/pydata/pandas/issues/2849 +.. _GH2850: https://github.com/pydata/pandas/issues/2850 .. _GH2898: https://github.com/pydata/pandas/issues/2898 .. _GH2892: https://github.com/pydata/pandas/issues/2892 .. _GH2909: https://github.com/pydata/pandas/issues/2909 diff --git a/pandas/core/index.py b/pandas/core/index.py index 42fe1c4ccb928..0f9776e202c00 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -173,9 +173,9 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ if len(self) > 6 and len(self) > np.get_printoptions()['threshold']: - data = self[:3].tolist() + ["..."] + self[-3:].tolist() + data = self[:3].format() + ["..."] + self[-3:].format() else: - data = self + data = self.format() prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n')) return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype) @@ -247,8 +247,14 @@ def _has_complex_internals(self): def summary(self, name=None): if len(self) > 0: - index_summary = ', %s to %s' % (com.pprint_thing(self[0]), - com.pprint_thing(self[-1])) + head = self[0] + if hasattr(head,'format'): + head = head.format() + tail = self[-1] + if hasattr(tail,'format'): + tail = tail.format() + index_summary = ', %s to %s' % (com.pprint_thing(head), + com.pprint_thing(tail)) else: index_summary = '' @@ -419,7 +425,7 @@ def take(self, indexer, axis=0): taken = self.view(np.ndarray).take(indexer) return self._constructor(taken, name=self.name) - def format(self, name=False, formatter=None): + def format(self, name=False, formatter=None, na_rep='NaN'): """ Render a string representation of the Index """ @@ -454,6 +460,14 @@ def format(self, name=False, formatter=None): if values.dtype == np.object_: result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) for x in values] + + # could have nans + mask = isnull(values) + if mask.any(): + result = np.array(result) + result[mask] = na_rep + result = result.tolist() + else: result = _trim_front(format_array(values, None, justify='left')) return header + result @@ -1446,10 +1460,9 @@ def __unicode__(self): np.set_printoptions(threshold=50) if len(self) > 100: - values = np.concatenate([self[:50].values, - self[-50:].values]) + values = self[:50].format() + self[-50:].format() else: - values = self.values + values = self.format() summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n')) @@ -1618,7 +1631,16 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, stringified_levels = [] for lev, lab in zip(self.levels, self.labels): if len(lev) > 0: + formatted = lev.take(lab).format(formatter=formatter) + + # we have some NA + mask = lab==-1 + if mask.any(): + formatted = np.array(formatted) + formatted[mask] = na_rep + formatted = formatted.tolist() + else: # weird all NA case formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 1b436bfd443fc..d32a50a37f667 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -603,6 +603,31 @@ def test_long_series(self): nmatches = len(re.findall('dtype',str_rep)) self.assert_(nmatches == 1) + def test_index_with_nan(self): + # GH 2850 + df = DataFrame({'id1': {0: '1a3', 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, + 'id3': {0: '78d', 1: '79d'}, 'value': {0: 123, 1: 64}}) + + # multi-index + y = df.set_index(['id1', 'id2', 'id3']) + result = y.to_string() + expected = u' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64' + self.assert_(result == expected) + + # index + y = df.set_index('id2') + result = y.to_string() + expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64' + self.assert_(result == expected) + + # all-nan in mi + df2 = df.copy() + df2.ix[:,'id2'] = np.nan + y = df2.set_index('id2') + result = y.to_string() + expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64' + self.assert_(result == expected) + def test_to_string(self): from pandas import read_table import re @@ -1234,10 +1259,16 @@ def test_datetimeindex(self): result = s.to_string() self.assertTrue('2013-01-02' in result) - s = Series(2, index=[ Timestamp('20130111'), NaT ]).append(s) + # nat in index + s2 = Series(2, index=[ Timestamp('20130111'), NaT ]) + s = s2.append(s) result = s.to_string() self.assertTrue('NaT' in result) + # nat in summary + result = str(s2.index) + self.assertTrue('NaT' in result) + def test_timedelta64(self): from pandas import date_range diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index a5732f252d617..aad2a7d988890 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -351,12 +351,13 @@ def test_format(self): # 2845 index = Index([1, 2.0+3.0j, np.nan]) formatted = index.format() - expected = [str(index[0]), str(index[1]), str(index[2])] + expected = [str(index[0]), str(index[1]), u'NaN'] self.assertEquals(formatted, expected) + # is this really allowed? index = Index([1, 2.0+3.0j, None]) formatted = index.format() - expected = [str(index[0]), str(index[1]), ''] + expected = [str(index[0]), str(index[1]), u'NaN'] self.assertEquals(formatted, expected) self.strIndex[:0].format()