Skip to content

Commit

Permalink
BUG: Formatting of an index that has nan was inconsistent or wron…
Browse files Browse the repository at this point in the history
…g (would fill from

     other values), (GH2850_)

BUG: issue in test_index.py/test_format
       1) printing of 'nan' rather than the na_rep (NaN) is inconcistent
          with everywhere else
       2) a 'None' in the index is defacto treated as NaN, is this wrong?

CLN: constistency among index for NaN/NaT values
  • Loading branch information
jreback committed Mar 13, 2013
1 parent a79f08c commit aae6213
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 11 deletions.
3 changes: 3 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ pandas 0.11.0
- Bug in DataFrame column insertion when the column creation fails, existing frame is left in
an irrecoverable state (GH3010_)
- Bug in DataFrame update where non-specified values could cause dtype changes (GH3016_)
- Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from
other values), (GH2850_)

.. _GH622: https://github.com/pydata/pandas/issues/622
.. _GH797: https://github.com/pydata/pandas/issues/797
Expand All @@ -161,6 +163,7 @@ pandas 0.11.0
.. _GH2867: https://github.com/pydata/pandas/issues/2867
.. _GH2807: https://github.com/pydata/pandas/issues/2807
.. _GH2849: https://github.com/pydata/pandas/issues/2849
.. _GH2850: https://github.com/pydata/pandas/issues/2850
.. _GH2898: https://github.com/pydata/pandas/issues/2898
.. _GH2892: https://github.com/pydata/pandas/issues/2892
.. _GH2909: https://github.com/pydata/pandas/issues/2909
Expand Down
38 changes: 30 additions & 8 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ def __unicode__(self):
Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
"""
if len(self) > 6 and len(self) > np.get_printoptions()['threshold']:
data = self[:3].tolist() + ["..."] + self[-3:].tolist()
data = self[:3].format() + ["..."] + self[-3:].format()
else:
data = self
data = self.format()

prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n'))
return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)
Expand Down Expand Up @@ -247,8 +247,14 @@ def _has_complex_internals(self):

def summary(self, name=None):
if len(self) > 0:
index_summary = ', %s to %s' % (com.pprint_thing(self[0]),
com.pprint_thing(self[-1]))
head = self[0]
if hasattr(head,'format'):
head = head.format()
tail = self[-1]
if hasattr(tail,'format'):
tail = tail.format()
index_summary = ', %s to %s' % (com.pprint_thing(head),
com.pprint_thing(tail))
else:
index_summary = ''

Expand Down Expand Up @@ -419,7 +425,7 @@ def take(self, indexer, axis=0):
taken = self.view(np.ndarray).take(indexer)
return self._constructor(taken, name=self.name)

def format(self, name=False, formatter=None):
def format(self, name=False, formatter=None, na_rep='NaN'):
"""
Render a string representation of the Index
"""
Expand Down Expand Up @@ -454,6 +460,14 @@ def format(self, name=False, formatter=None):
if values.dtype == np.object_:
result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
for x in values]

# could have nans
mask = isnull(values)
if mask.any():
result = np.array(result)
result[mask] = na_rep
result = result.tolist()

else:
result = _trim_front(format_array(values, None, justify='left'))
return header + result
Expand Down Expand Up @@ -1446,10 +1460,9 @@ def __unicode__(self):
np.set_printoptions(threshold=50)

if len(self) > 100:
values = np.concatenate([self[:50].values,
self[-50:].values])
values = self[:50].format() + self[-50:].format()
else:
values = self.values
values = self.format()

summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n'))

Expand Down Expand Up @@ -1618,7 +1631,16 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
stringified_levels = []
for lev, lab in zip(self.levels, self.labels):
if len(lev) > 0:

formatted = lev.take(lab).format(formatter=formatter)

# we have some NA
mask = lab==-1
if mask.any():
formatted = np.array(formatted)
formatted[mask] = na_rep
formatted = formatted.tolist()

else:
# weird all NA case
formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
Expand Down
33 changes: 32 additions & 1 deletion pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,31 @@ def test_long_series(self):
nmatches = len(re.findall('dtype',str_rep))
self.assert_(nmatches == 1)

def test_index_with_nan(self):
# GH 2850
df = DataFrame({'id1': {0: '1a3', 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
'id3': {0: '78d', 1: '79d'}, 'value': {0: 123, 1: 64}})

# multi-index
y = df.set_index(['id1', 'id2', 'id3'])
result = y.to_string()
expected = u' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64'
self.assert_(result == expected)

# index
y = df.set_index('id2')
result = y.to_string()
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64'
self.assert_(result == expected)

# all-nan in mi
df2 = df.copy()
df2.ix[:,'id2'] = np.nan
y = df2.set_index('id2')
result = y.to_string()
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64'
self.assert_(result == expected)

def test_to_string(self):
from pandas import read_table
import re
Expand Down Expand Up @@ -1234,10 +1259,16 @@ def test_datetimeindex(self):
result = s.to_string()
self.assertTrue('2013-01-02' in result)

s = Series(2, index=[ Timestamp('20130111'), NaT ]).append(s)
# nat in index
s2 = Series(2, index=[ Timestamp('20130111'), NaT ])
s = s2.append(s)
result = s.to_string()
self.assertTrue('NaT' in result)

# nat in summary
result = str(s2.index)
self.assertTrue('NaT' in result)

def test_timedelta64(self):

from pandas import date_range
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,12 +351,13 @@ def test_format(self):
# 2845
index = Index([1, 2.0+3.0j, np.nan])
formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2])]
expected = [str(index[0]), str(index[1]), u'NaN']
self.assertEquals(formatted, expected)

# is this really allowed?
index = Index([1, 2.0+3.0j, None])
formatted = index.format()
expected = [str(index[0]), str(index[1]), '']
expected = [str(index[0]), str(index[1]), u'NaN']
self.assertEquals(formatted, expected)

self.strIndex[:0].format()
Expand Down

0 comments on commit aae6213

Please sign in to comment.