From ad91e061cdd621ffe3e06b3270e5051b0c740539 Mon Sep 17 00:00:00 2001 From: Asgeir Nyvoll Date: Fri, 19 Aug 2022 17:37:05 +0200 Subject: [PATCH 1/2] Stricten parsing of malformed parameter csv's with pandas>=1.4 --- ecl2df/parameters.py | 18 +++++++++++++++--- setup.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ecl2df/parameters.py b/ecl2df/parameters.py index 3db8c7113..ecace08c8 100644 --- a/ecl2df/parameters.py +++ b/ecl2df/parameters.py @@ -3,6 +3,7 @@ import json import logging +import warnings from pathlib import Path from typing import Any, Dict, List, Union @@ -65,9 +66,20 @@ def load_parameterstxt(filename: Union[str, Path]) -> Dict[str, Any]: filename: file containing one key-value pair pr. line, separated by whitespace """ - dframe = pd.read_csv( - filename, comment="#", sep=r"\s", engine="python", names=["KEY", "VALUE"] - ) + with warnings.catch_warnings(record=True): + warnings.filterwarnings("error") + try: + dframe = pd.read_csv( + filename, + comment="#", + sep=r"\s", + engine="python", + names=["KEY", "VALUE"], + index_col=False, + ) + except pd.errors.ParserWarning as txt_exc: + raise pd.errors.ParserError(txt_exc) + return dframe.set_index("KEY")["VALUE"].to_dict() diff --git a/setup.py b/setup.py index 7222da8d0..12ce4b765 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ "ecl", "numpy", "opm>=2020.10.2", # NB: Pypi versions. - "pandas<1.4.0", + "pandas", "pyarrow", "pyyaml>=5.1", "treelib", From 5a599249c2c2b606c55963e553dbd33ed6be5920 Mon Sep 17 00:00:00 2001 From: Asgeir Nyvoll Date: Mon, 22 Aug 2022 08:51:07 +0200 Subject: [PATCH 2/2] Add comment on ParserWarning raised as ParserError --- ecl2df/parameters.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ecl2df/parameters.py b/ecl2df/parameters.py index ecace08c8..b97af9c57 100644 --- a/ecl2df/parameters.py +++ b/ecl2df/parameters.py @@ -67,6 +67,9 @@ def load_parameterstxt(filename: Union[str, Path]) -> Dict[str, Any]: separated by whitespace """ with warnings.catch_warnings(record=True): + # From pandas 1.4, too many columns result in a ParserWarning for dropped + # data. This is risky, and therefore catching the warning and raising a + # ParserError instead. warnings.filterwarnings("error") try: dframe = pd.read_csv(