Skip to content

Commit

Permalink
BUG: DataFrame.diff(axis=0) with DatetimeTZ data (pandas-dev#19773)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and jreback committed Mar 1, 2018
1 parent c5a1ef1 commit 9242248
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ Timezones
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)

Offsets
^^^^^^^
Expand Down
29 changes: 29 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2905,6 +2905,35 @@ def shift(self, periods, axis=0, mgr=None):
return [self.make_block_same_class(new_values,
placement=self.mgr_locs)]

def diff(self, n, axis=0, mgr=None):
"""1st discrete difference
Parameters
----------
n : int, number of periods to diff
axis : int, axis to diff upon. default 0
mgr : default None
Return
------
A list with a new TimeDeltaBlock.
Note
----
The arguments here are mimicking shift so they are called correctly
by apply.
"""
if axis == 0:
# Cannot currently calculate diff across multiple blocks since this
# function is invoked via apply
raise NotImplementedError
new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8

# Reshape the new_values like how algos.diff does for timedelta data
new_values = new_values.reshape(1, len(new_values))
new_values = new_values.astype('timedelta64[ns]')
return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]

def concat_same_type(self, to_concat, placement=None):
"""
Concatenate list of single blocks of the same type.
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/frame/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@ def test_diff(self):
1), 'z': pd.Series(1)}).astype('float64')
assert_frame_equal(result, expected)

@pytest.mark.parametrize('tz', [None, 'UTC'])
def test_diff_datetime_axis0(self, tz):
# GH 18578
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
1: date_range('2010', freq='D', periods=2, tz=tz)})

result = df.diff(axis=0)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', '1 days']),
1: pd.TimedeltaIndex(['NaT', '1 days'])})
assert_frame_equal(result, expected)

@pytest.mark.parametrize('tz', [None, 'UTC'])
def test_diff_datetime_axis1(self, tz):
# GH 18578
df = DataFrame({0: date_range('2010', freq='D', periods=2, tz=tz),
1: date_range('2010', freq='D', periods=2, tz=tz)})
if tz is None:
result = df.diff(axis=1)
expected = DataFrame({0: pd.TimedeltaIndex(['NaT', 'NaT']),
1: pd.TimedeltaIndex(['0 days',
'0 days'])})
assert_frame_equal(result, expected)
else:
with pytest.raises(NotImplementedError):
result = df.diff(axis=1)

def test_diff_timedelta(self):
# GH 4533
df = DataFrame(dict(time=[Timestamp('20130101 9:01'),
Expand Down

0 comments on commit 9242248

Please sign in to comment.