diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index aed00ca578984..424b496c93f31 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -283,6 +283,7 @@ I/O - Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) - Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696, :issue:`16798`). - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 543a943aea311..928b2cdd57b2c 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -535,23 +535,26 @@ cdef class TextReader: self.parser_start = 0 self.header = [] else: - if isinstance(header, list) and len(header): - # need to artifically skip the final line - # which is still a header line - header = list(header) - header.append(header[-1] + 1) + if isinstance(header, list): + if len(header) > 1: + # need to artifically skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + self.parser_start = header[-1] + 1 self.parser.header_start = header[0] - self.parser.header_end = header[-1] self.parser.header = header[0] - self.parser_start = header[-1] + 1 - self.has_mi_columns = 1 self.header = header else: self.parser.header_start = header self.parser.header_end = header - self.parser.header = header self.parser_start = header + 1 + self.parser.header = header self.header = [ header ] self.names = names diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ea0bb104338b6..3a78866b7b53f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2279,10 +2279,11 @@ def _infer_columns(self): if self.header is not None: header = self.header - # we have a mi columns, so read an extra line if isinstance(header, (list, tuple, np.ndarray)): - have_mi_columns = True - header = list(header) + [header[-1] + 1] + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] else: have_mi_columns = False header = [header] diff --git a/pandas/tests/io/parser/header.py b/pandas/tests/io/parser/header.py index 4935fd2cd910a..50ae4dae541ac 100644 --- a/pandas/tests/io/parser/header.py +++ b/pandas/tests/io/parser/header.py @@ -286,3 +286,10 @@ def test_non_int_header(self): self.read_csv(StringIO(data), sep=',', header=['a', 'b']) with tm.assert_raises_regex(ValueError, msg): self.read_csv(StringIO(data), sep=',', header='string_header') + + def test_singleton_header(self): + # See GH #7757 + data = """a,b,c\n0,1,2\n1,2,3""" + df = self.read_csv(StringIO(data), header=[0]) + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + tm.assert_frame_equal(df, expected)