diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index cd56f590a33bf..566e3d7c66b79 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -115,7 +115,7 @@ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for mor Other API changes ^^^^^^^^^^^^^^^^^ -- +- :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser. - .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 6e4ea85548230..dc104b3020f14 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -33,6 +33,7 @@ from pandas.io.parsers.base_parser import ( ParserBase, + ParserError, is_index_col, ) @@ -270,6 +271,13 @@ def read( # implicit index, no index names arrays = [] + if self.index_col and self._reader.leading_cols != len(self.index_col): + raise ParserError( + "Could not construct index. Requested to use " + f"{len(self.index_col)} number of columns, but " + f"{self._reader.leading_cols} left to parse." + ) + for i in range(self._reader.leading_cols): if self.index_col is None: values = data.pop(i) diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index f52af109626e9..fc30ebff0d93a 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -292,6 +292,18 @@ def test_conflict_on_bad_line(all_parsers, error_bad_lines, warn_bad_lines): parser.read_csv(StringIO(data), on_bad_lines="error", **kwds) +def test_bad_header_uniform_error(all_parsers): + parser = all_parsers + data = "+++123456789...\ncol1,col2,col3,col4\n1,2,3,4\n" + msg = "Expected 2 fields in line 2, saw 4" + if parser.engine == "c": + msg = "Could not construct index. Requested to use 1 " + "number of columns, but 3 left to parse." + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), index_col=0, on_bad_lines="error") + + def test_on_bad_lines_warn_correct_formatting(all_parsers, capsys): # see gh-15925 parser = all_parsers