Skip to content

Commit

Permalink
BUG: Series.asof fails for all NaN Series (GH15713)
Browse files Browse the repository at this point in the history
closes bug #15713

Added the test if the series is all nans  Added the code that check if
that's the case: if yes, return the expected output

Author: Carlos Souza <carlos@udacity.com>

Closes #15758 from ucals/bug-fix-15713 and squashes the following commits:

0765108 [Carlos Souza] First simplification, code-block in the same place
bb63964 [Carlos Souza] Propagating Series name
af9a29b [Carlos Souza] Setting name of asof result when scalar input and all nan
b8f078a [Carlos Souza] Small code standard change
7448b96 [Carlos Souza] Fixing scalar input
a080b9b [Carlos Souza] Making scalar input return in a Series
04b7306 [Carlos Souza] Removing .values and formating code PEP8
3f9c7fd [Carlos Souza] Minor comments
70c958f [Carlos Souza] Added tests for non-default indexes, scalar and multiple inputs, and results preserve columns
6b745af [Carlos Souza] Adding DataFrame tests & support, and optimizing the code
89fb6cf [Carlos Souza] BUG #15713 fixing failing tests
17d1d77 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans!
4e26ab8 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans.
c78d687 [Carlos Souza] BUG #15713 Series.asof return nan when series is all nans
676a4e5 [Carlos Souza] Test
  • Loading branch information
Carlos Souza authored and jreback committed Mar 26, 2017
1 parent 22f9d0d commit d2f32a0
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 11 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,7 @@ Bug Fixes
- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`)
- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`)
- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`)
- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`)

- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`)

Expand Down
10 changes: 10 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3972,6 +3972,16 @@ def asof(self, where, subset=None):
where = Index(where) if is_list else Index([where])

nulls = self.isnull() if is_series else self[subset].isnull().any(1)
if nulls.all():
if is_series:
return self._constructor(np.nan, index=where, name=self.name)
elif is_list:
from pandas import DataFrame
return DataFrame(np.nan, index=where, columns=self.columns)
else:
from pandas import Series
return Series(np.nan, index=self.columns, name=where[0])

locs = self.index.asof_locs(where, ~(nulls.values))

# mask the missing
Expand Down
47 changes: 36 additions & 11 deletions pandas/tests/frame/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@
from pandas import (DataFrame, date_range, Timestamp, Series,
to_datetime)

from pandas.util.testing import assert_frame_equal, assert_series_equal
import pandas.util.testing as tm

from .common import TestData


class TestFrameAsof(TestData, tm.TestCase):

def setUp(self):
self.N = N = 50
rng = date_range('1/1/1990', periods=N, freq='53s')
self.rng = date_range('1/1/1990', periods=N, freq='53s')
self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
index=rng)
index=self.rng)

def test_basic(self):

df = self.df.copy()
df.loc[15:30, 'A'] = np.nan
dates = date_range('1/1/1990', periods=self.N * 3,
Expand All @@ -39,7 +36,6 @@ def test_basic(self):
self.assertTrue((rs == 14).all(1).all())

def test_subset(self):

N = 10
rng = date_range('1/1/1990', periods=N, freq='53s')
df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
Expand All @@ -51,19 +47,19 @@ def test_subset(self):
# with a subset of A should be the same
result = df.asof(dates, subset='A')
expected = df.asof(dates)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

# same with A/B
result = df.asof(dates, subset=['A', 'B'])
expected = df.asof(dates)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

# B gives self.df.asof
result = df.asof(dates, subset='B')
expected = df.resample('25s', closed='right').ffill().reindex(dates)
expected.iloc[20:] = 9

assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_missing(self):
# GH 15118
Expand All @@ -75,9 +71,38 @@ def test_missing(self):
result = df.asof('1989-12-31')

expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)

result = df.asof(to_datetime(['1989-12-31']))
expected = DataFrame(index=to_datetime(['1989-12-31']),
columns=['A', 'B'], dtype='float64')
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_all_nans(self):
# GH 15713
# DataFrame is all nans
result = DataFrame([np.nan]).asof([0])
expected = DataFrame([np.nan])
tm.assert_frame_equal(result, expected)

# testing non-default indexes, multiple inputs
dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates)
expected = DataFrame(np.nan, index=dates, columns=['A'])
tm.assert_frame_equal(result, expected)

# testing multiple columns
dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
result = DataFrame(np.nan, index=self.rng,
columns=['A', 'B', 'C']).asof(dates)
expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C'])
tm.assert_frame_equal(result, expected)

# testing scalar input
result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3])
expected = DataFrame(np.nan, index=[3], columns=['A', 'B'])
tm.assert_frame_equal(result, expected)

result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3)
expected = Series(np.nan, index=['A', 'B'], name=3)
tm.assert_series_equal(result, expected)
26 changes: 26 additions & 0 deletions pandas/tests/series/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,29 @@ def test_errors(self):
s = Series(np.random.randn(N), index=rng)
with self.assertRaises(ValueError):
s.asof(s.index[0], subset='foo')

def test_all_nans(self):
# GH 15713
# series is all nans
result = Series([np.nan]).asof([0])
expected = Series([np.nan])
tm.assert_series_equal(result, expected)

# testing non-default indexes
N = 50
rng = date_range('1/1/1990', periods=N, freq='53s')

dates = date_range('1/1/1990', periods=N * 3, freq='25s')
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)

# testing scalar input
date = date_range('1/1/1990', periods=N * 3, freq='25s')[0]
result = Series(np.nan, index=rng).asof(date)
assert isnull(result)

# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5])
expected = Series(np.nan, index=[4, 5], name='test')
tm.assert_series_equal(result, expected)

0 comments on commit d2f32a0

Please sign in to comment.