Skip to content

Commit

Permalink
BUG: fix mutation of DTI backing Series/DataFrame (pandas-dev#24096)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Dec 5, 2018
1 parent aead29b commit cb862e4
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 2 deletions.
4 changes: 3 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2923,7 +2923,9 @@ def _try_coerce_result(self, result):
# allow passing of > 1dim if its trivial
if result.ndim > 1:
result = result.reshape(np.prod(result.shape))
result = self.values._shallow_copy(result)

# GH#24096 new values invalidates a frequency
result = self.values._shallow_copy(result, freq=None)

return result

Expand Down
8 changes: 8 additions & 0 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ def init_dict(data, index, columns, dtype=None):
arrays.loc[missing] = [v] * missing.sum()

else:

for key in data:
if (isinstance(data[key], ABCDatetimeIndex) and
data[key].tz is not None):
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
data[key] = data[key].copy(deep=True)

keys = com.dict_keys_to_ordered_list(data)
columns = data_names = Index(keys)
arrays = [data[k] for k in keys]
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
is_extension_array_dtype, is_extension_type, is_hashable, is_integer,
is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDataFrame, ABCSeries, ABCSparseArray, ABCSparseSeries)
ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries)
from pandas.core.dtypes.missing import (
isna, na_value_for_dtype, notna, remove_na_arraylike)

Expand Down Expand Up @@ -182,6 +182,11 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
else:
# need to copy to avoid aliasing issues
data = data._values.copy()
if (isinstance(data, ABCDatetimeIndex) and
data.tz is not None):
# GH#24096 need copy to be deep for datetime64tz case
# TODO: See if we can avoid these copies
data = data._values.copy(deep=True)
copy = False

elif isinstance(data, np.ndarray):
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,22 @@


class TestDataFrameBlockInternals():
def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz column inplace invalidates the
# `freq` attribute on the underlying DatetimeIndex

dti = date_range('20130101', periods=3, tz='US/Eastern')
ts = dti[1]

df = DataFrame({'B': dti})
assert df['B']._values.freq == 'D'

df.iloc[1, 0] = pd.NaT
assert df['B']._values.freq is None

# check that the DatetimeIndex was not altered in place
assert dti.freq == 'D'
assert dti[1] == ts

def test_cast_internals(self, float_frame):
casted = DataFrame(float_frame._data, dtype=int)
Expand Down
42 changes: 42 additions & 0 deletions pandas/tests/series/test_block_internals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-

import pandas as pd

# Segregated collection of methods that require the BlockManager internal data
# structure


class TestSeriesBlockInternals(object):

def test_setitem_invalidates_datetime_index_freq(self):
# GH#24096 altering a datetime64tz Series inplace invalidates the
# `freq` attribute on the underlying DatetimeIndex

dti = pd.date_range('20130101', periods=3, tz='US/Eastern')
ts = dti[1]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert dti.freq == 'D'
ser.iloc[1] = pd.NaT
assert ser._values.freq is None

# check that the DatetimeIndex was not altered in place
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert dti[1] == ts
assert dti.freq == 'D'

def test_dt64tz_setitem_does_not_mutate_dti(self):
# GH#21907, GH#24096
dti = pd.date_range('2016-01-01', periods=10, tz='US/Pacific')
ts = dti[0]
ser = pd.Series(dti)
assert ser._values is not dti
assert ser._values._data.base is not dti._data.base
assert ser._data.blocks[0].values is not dti
assert ser._data.blocks[0].values._data.base is not dti._data.base

ser[::3] = pd.NaT
assert ser[0] is pd.NaT
assert dti[0] == ts

0 comments on commit cb862e4

Please sign in to comment.