Skip to content

Commit

Permalink
Merge pull request pandas-dev#7 from dimastbk/issue-50395
Browse files Browse the repository at this point in the history
added xfail to tests, small fixes
  • Loading branch information
kostyafarber authored Apr 8, 2023
2 parents 8803ca9 + 2f5ffba commit b8b1a9a
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 19 deletions.
1 change: 0 additions & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,6 @@ Other enhancements
- :meth:`Series.dropna` and :meth:`DataFrame.dropna` has gained ``ignore_index`` keyword to reset index (:issue:`31725`)
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
- Added support for :meth:`Index.min` and :meth:`Index.max` for pyarrow string dtypes (:issue:`51397`)
- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
- Added :meth:`Series.dt.unit` and :meth:`Series.dt.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`51223`)
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ Other enhancements
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
- :meth:`DataFrame.applymap` now uses the :meth:`~api.extensions.ExtensionArray.map` method of underlying :class:`api.extensions.ExtensionArray` instances (:issue:`52219`)
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
- Added ``calamine`` as an engine to ``read_excel`` (:issue:`50395`)
- :meth:`Categorical.map` and :meth:`CategoricalIndex.map` now have a ``na_action`` parameter (:issue:`44279`)
- Added ``calamine`` as an engine to ``read_excel`` (:issue:`50395`)
- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)

.. ---------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,7 @@ class ExcelFile:
This is not supported, switch to using ``openpyxl`` instead.
"""

from pandas.io.excel._calaminereader import CalamineExcelReader
from pandas.io.excel._calamine import CalamineReader
from pandas.io.excel._odfreader import ODFReader
from pandas.io.excel._openpyxl import OpenpyxlReader
from pandas.io.excel._pyxlsb import PyxlsbReader
Expand All @@ -1467,7 +1467,7 @@ class ExcelFile:
"openpyxl": OpenpyxlReader,
"odf": ODFReader,
"pyxlsb": PyxlsbReader,
"calamine": CalamineExcelReader,
"calamine": CalamineReader,
}

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
_CellValueT = Union[int, float, str, bool, time, date, datetime]


class CalamineExcelReader(BaseExcelReader):
class CalamineReader(BaseExcelReader):
@doc(storage_options=_shared_docs["storage_options"])
def __init__(
self,
Expand Down
80 changes: 66 additions & 14 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,10 @@ def test_reader_special_dtypes(self, request, engine, read_ext):
reason="Calamine support parsing datetime only in xlsx"
)
)
if engine == "calamine":
request.node.add_marker(
pytest.mark.xfail(reason="Calamine can't parse this datetime format")
)

expected = DataFrame.from_dict(
{
Expand Down Expand Up @@ -584,11 +588,16 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
actual = pd.read_excel(basename + read_ext, dtype=dtype)
tm.assert_frame_equal(actual, expected)

def test_dtype_backend(self, read_ext, dtype_backend):
def test_dtype_backend(self, request, engine, read_ext, dtype_backend):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")

if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
)

df = DataFrame(
{
"a": Series([1, 3], dtype="Int64"),
Expand Down Expand Up @@ -629,11 +638,16 @@ def test_dtype_backend(self, read_ext, dtype_backend):
expected = df
tm.assert_frame_equal(result, expected)

def test_dtype_backend_and_dtype(self, read_ext):
def test_dtype_backend_and_dtype(self, request, engine, read_ext):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")

if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
)

df = DataFrame({"a": [np.nan, 1.0], "b": [2.5, np.nan]})
with tm.ensure_clean(read_ext) as file_path:
df.to_excel(file_path, "test", index=False)
Expand All @@ -646,11 +660,16 @@ def test_dtype_backend_and_dtype(self, read_ext):
tm.assert_frame_equal(result, df)

@td.skip_if_no("pyarrow")
def test_dtype_backend_string(self, read_ext, string_storage):
def test_dtype_backend_string(self, request, engine, read_ext, string_storage):
# GH#36712
if read_ext in (".xlsb", ".xls"):
pytest.skip(f"No engine for filetype: '{read_ext}'")

if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(reason="Calamine doesn't support invalid ods")
)

import pyarrow as pa

with pd.option_context("mode.string_storage", string_storage):
Expand Down Expand Up @@ -694,8 +713,15 @@ def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
assert dtype_dict == dtype_dict_copy, "dtype dict changed"
tm.assert_frame_equal(result, expected)

def test_reader_spaces(self, read_ext):
def test_reader_spaces(self, request, engine, read_ext):
# see gh-32207

# https://github.com/tafia/calamine/pull/289
if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(reason="Calamine doesn't respect spaces in ods")
)

basename = "test_spaces"

actual = pd.read_excel(basename + read_ext)
Expand Down Expand Up @@ -790,12 +816,6 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
reason="Sheets containing datetimes not supported by pyxlsb"
)
)
if engine == "calamine" and read_ext in {".xls", ".xlsb", ".ods"}:
request.node.add_marker(
pytest.mark.xfail(
reason="Calamine support parsing datetime only in xlsx"
)
)

expected = DataFrame(
[
Expand All @@ -806,6 +826,11 @@ def test_date_conversion_overflow(self, request, engine, read_ext):
columns=["DateColWithBigInt", "StringCol"],
)

if engine == "calamine":
request.node.add_marker(
pytest.mark.xfail(reason="Maybe not supported by calamine")
)

if engine == "openpyxl":
request.node.add_marker(
pytest.mark.xfail(reason="Maybe not supported by openpyxl")
Expand Down Expand Up @@ -1008,6 +1033,12 @@ def test_reader_seconds(self, request, engine, read_ext):
reason="Calamine support parsing datetime only in xlsx"
)
)
if engine == "calamine":
request.node.add_marker(
pytest.mark.xfail(
reason="Calamine doesn't support parsing milliseconds in datetime"
)
)

# Test reading times with and without milliseconds. GH5945.
expected = DataFrame.from_dict(
Expand Down Expand Up @@ -1174,10 +1205,17 @@ def test_read_excel_multiindex_blank_after_name(
)
tm.assert_frame_equal(result, expected)

def test_read_excel_multiindex_header_only(self, read_ext):
def test_read_excel_multiindex_header_only(self, request, engine, read_ext):
# see gh-11733.
#
# Don't try to parse a header name if there isn't one.
if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(
reason="Calamine doesn't support 'number-rows-repeated' in ods"
)
)

mi_file = "testmultiindex" + read_ext
result = pd.read_excel(mi_file, sheet_name="index_col_none", header=[0, 1])

Expand Down Expand Up @@ -1418,8 +1456,15 @@ def test_deprecated_kwargs(self, read_ext):
with pytest.raises(TypeError, match="but 3 positional arguments"):
pd.read_excel("test1" + read_ext, "Sheet1", 0)

def test_no_header_with_list_index_col(self, read_ext):
def test_no_header_with_list_index_col(self, request, engine, read_ext):
# GH 31783
if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(
reason="Calamine doesn't support 'number-rows-repeated' in ods"
)
)

file_name = "testmultiindex" + read_ext
data = [("B", "B"), ("key", "val"), (3, 4), (3, 4)]
idx = MultiIndex.from_tuples(
Expand All @@ -1439,8 +1484,15 @@ def test_one_col_noskip_blank_line(self, read_ext):
result = pd.read_excel(file_name)
tm.assert_frame_equal(result, expected)

def test_multiheader_two_blank_lines(self, read_ext):
def test_multiheader_two_blank_lines(self, request, engine, read_ext):
# GH 40442
if engine == "calamine" and read_ext == ".ods":
request.node.add_marker(
pytest.mark.xfail(
reason="Calamine doesn't support 'number-rows-repeated' in ods"
)
)

file_name = "testmultiindex" + read_ext
columns = MultiIndex.from_tuples([("a", "A"), ("b", "B")])
data = [[np.nan, np.nan], [np.nan, np.nan], [1, 3], [2, 4]]
Expand Down Expand Up @@ -1703,7 +1755,7 @@ def test_excel_read_binary(self, engine, read_ext):
def test_excel_read_binary_via_read_excel(self, read_ext, engine):
# GH 38424
with open("test1" + read_ext, "rb") as f:
result = pd.read_excel(f)
result = pd.read_excel(f, engine=engine)
expected = pd.read_excel("test1" + read_ext, engine=engine)
tm.assert_frame_equal(result, expected)

Expand Down

0 comments on commit b8b1a9a

Please sign in to comment.