Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix to_json when converting Period column #32665

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,4 @@ doc/build/html/index.html
doc/tmp.sv
env/
doc/source/savefig/
venv/
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ I/O
- Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`)
- Bug in :meth:`read_json` was raising ``TypeError`` when reading a list of booleans into a Series. (:issue:`31464`)
- Bug in :func:`pandas.io.json.json_normalize` where location specified by `record_path` doesn't point to an array. (:issue:`26284`)
- Bug in :meth:`to_json` was raising ``AttributeError`` with column or Series of `PeriodDtype` (:issue:`31917`)
- :func:`pandas.read_hdf` has a more explicit error message when loading an
unsupported HDF file (:issue:`9539`)
- Bug in :meth:`~DataFrame.read_feather` was raising an `ArrowIOError` when reading an s3 or http file path (:issue:`29055`)
Expand Down
38 changes: 38 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,15 @@ def _create_series(index):
}


@pytest.fixture
def period_series():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd did we settle on a naming scheme for these?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems reasonable; we already have a datetime_series so this follows that pattern

"""Fixture for Series with Period-type index.
"""
s = tm.makePeriodSeries()
s.name = "ps"
return s


@pytest.fixture
def series_with_simple_index(indices):
"""
Expand Down Expand Up @@ -588,6 +597,35 @@ def datetime_frame():
return DataFrame(tm.getTimeSeriesData())


@pytest.fixture
def period_frame():
"""
Fixture for DataFrame of floats with PeriodIndex

Columns are ['A', 'B', 'C', 'D']

A B C D
2000-01-03 -1.122153 0.468535 0.122226 1.693711
2000-01-04 0.189378 0.486100 0.007864 -1.216052
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357
2000-01-06 0.430050 0.894352 0.090719 0.036939
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335
2000-01-10 -0.752633 0.328434 -0.815325 0.699674
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106
... ... ... ... ...
2000-02-03 1.642618 -0.579288 0.046005 1.385249
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351
2000-02-07 -2.656149 -0.601387 1.410148 0.444150
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300
2000-02-09 1.377373 0.398619 1.008453 -0.928207
2000-02-10 0.473194 -0.636677 0.984058 0.511519
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948

[30 rows x 4 columns]
"""
return DataFrame(tm.getPeriodData())


@pytest.fixture
def float_frame():
"""
Expand Down
78 changes: 54 additions & 24 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,6 @@ def setup(self):

yield

@pytest.fixture
def datetime_series(self):
# Same as usual datetime_series, but with index freq set to None,
# since that doesnt round-trip, see GH#33711
ser = tm.makeTimeSeries()
ser.name = "ts"
ser.index = ser.index._with_freq(None)
return ser

@pytest.fixture
def datetime_frame(self):
# Same as usual datetime_frame, but with index freq set to None,
# since that doesnt round-trip, see GH#33711
df = DataFrame(tm.getTimeSeriesData())
df.index = df.index._with_freq(None)
return df

def test_frame_double_encoded_labels(self, orient):
df = DataFrame(
[["a", "b"], ["c", "d"]],
Expand Down Expand Up @@ -382,6 +365,42 @@ def test_frame_to_json_except(self):
with pytest.raises(ValueError, match=msg):
df.to_json(orient="garbage")

def test_frame_roundtrip_period_index(self, orient, period_frame):
# GH32665: Fix to_json when converting Period column/series
if orient == "split":
pytest.skip("skipping orient=split due to different conversion schema")

data = period_frame.to_json(orient=orient)
result = pd.read_json(data, typ="frame", orient=orient)

expected = period_frame.copy()
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
if orient == "values":
# drop column names as well
expected = expected.T.reset_index(drop=True).T
if orient in ("index", "columns"):
result.index = result.index.to_period(freq=expected.index.freq)
expected.name = None

tm.assert_frame_equal(result, expected)

@pytest.mark.skip(reason="Conversion of Period-like column in dict-like format")
def test_frame_roundtrip_period_columns(self, orient, period_frame):
# GH32665: Fix to_json when converting Period column/series

test_frame = period_frame.reset_index()
data = test_frame.to_json(orient=orient)
result = pd.read_json(data, typ="frame", orient=orient)

expected = test_frame
if orient == "values":
expected.colummns = range(len(expected.columns))
if orient != "split":
expected.name = None

tm.assert_frame_equal(result, expected)

def test_frame_empty(self):
df = DataFrame(columns=["jim", "joe"])
assert not df._is_mixed_type
Expand Down Expand Up @@ -433,9 +452,6 @@ def test_frame_mixedtype_orient(self): # GH10289
tm.assert_frame_equal(left, right)

def test_v12_compat(self, datapath):
dti = pd.date_range("2000-01-03", "2000-01-07")
# freq doesnt roundtrip
dti = pd.DatetimeIndex(np.asarray(dti), freq=None)
df = DataFrame(
[
[1.56808523, 0.65727391, 1.81021139, -0.17251653],
Expand All @@ -445,7 +461,7 @@ def test_v12_compat(self, datapath):
[0.05951614, -2.69652057, 1.28163262, 0.34703478],
],
columns=["A", "B", "C", "D"],
index=dti,
index=pd.date_range("2000-01-03", "2000-01-07"),
)
df["date"] = pd.Timestamp("19920106 18:21:32.12")
df.iloc[3, df.columns.get_loc("date")] = pd.Timestamp("20130101")
Expand All @@ -464,9 +480,6 @@ def test_v12_compat(self, datapath):

def test_blocks_compat_GH9037(self):
index = pd.date_range("20000101", periods=10, freq="H")
# freq doesnt round-trip
index = pd.DatetimeIndex(list(index), freq=None)

df_mixed = DataFrame(
OrderedDict(
float_1=[
Expand Down Expand Up @@ -673,6 +686,23 @@ def test_series_roundtrip_timeseries(self, orient, numpy, datetime_series):

tm.assert_series_equal(result, expected)

def test_series_roundtrip_periodseries(self, orient, period_series):
# GH32665: Fix to_json when converting Period column/series
if orient == "split":
pytest.skip("skipping orient=split due to different conversion schema")

data = period_series.to_json(orient=orient)
result = pd.read_json(data, typ="series", orient=orient)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue affects reading to a DataFrame as well right? If so can you test that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I can do that.


expected = period_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
if orient in ("index", "columns"):
result.index = result.index.to_period(freq=expected.index.freq)
expected.name = None

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", [np.float64, int])
@pytest.mark.parametrize("numpy", [True, False])
def test_series_roundtrip_numeric(self, orient, numpy, dtype):
Expand Down