diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 46abc16f1b96af..d836ef3441e89a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1264,9 +1264,6 @@ MultiIndex I/O ^^^ -- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) -- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) - .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine @@ -1302,6 +1299,9 @@ Current Behavior: Notice how we now instead output ``np.nan`` itself instead of a stringified form of it. +- Bug in :meth:`to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`to_sql` where a naive DatetimeIndex would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) +- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`) - :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` and func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index c25a7670cce442..a7e0e48de0a754 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -634,14 +634,17 @@ def _parse_cell(cell_contents, cell_typ): else: offset = 1 + max(header) - for col in index_col: - last = data[offset][col] - - for row in range(offset + 1, len(data)): - if data[row][col] == '' or data[row][col] is None: - data[row][col] = last - else: - last = data[row][col] + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == '' or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] has_index_names = is_list_like(header) and len(header) > 1 diff --git a/pandas/tests/io/data/test1.xls b/pandas/tests/io/data/test1.xls index db0f9dec7d5e42..a5940b2cfa6c24 100644 Binary files a/pandas/tests/io/data/test1.xls and b/pandas/tests/io/data/test1.xls differ diff --git a/pandas/tests/io/data/test1.xlsm b/pandas/tests/io/data/test1.xlsm index 4c873e55a53007..981c303b7bd30c 100644 Binary files a/pandas/tests/io/data/test1.xlsm and b/pandas/tests/io/data/test1.xlsm differ diff --git a/pandas/tests/io/data/test1.xlsx b/pandas/tests/io/data/test1.xlsx index e6d3a0d503cf25..8f011d06875215 100644 Binary files a/pandas/tests/io/data/test1.xlsx and b/pandas/tests/io/data/test1.xlsx differ diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 9b147d53c06c4b..c79affaff3c1f8 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -235,6 +235,16 @@ def test_index_col_label_error(self, ext): self.get_exceldf("test1", ext, "Sheet1", index_col=["A"], usecols=["A", "C"]) + def test_index_col_empty(self, ext): + # see gh-9208 + result = self.get_exceldf("test1", ext, "Sheet3", + index_col=["A", "B", "C"]) + expected = DataFrame(columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, + labels=[[]] * 3, + names=["A", "B", "C"])) + tm.assert_frame_equal(result, expected) + def test_usecols_pass_non_existent_column(self, ext): msg = ("Usecols do not match columns, " "columns expected but not found: " + r"\['E'\]")