diff --git a/ecl2df/common.py b/ecl2df/common.py index d40807066..741737c6a 100644 --- a/ecl2df/common.py +++ b/ecl2df/common.py @@ -13,6 +13,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Set, Union +import dateutil.parser import numpy as np import pandas as pd import pyarrow @@ -91,6 +92,22 @@ .splitlines() ) ] +ECLMONTH2NUM = { + "JAN": 1, + "FEB": 2, + "MAR": 3, + "APR": 4, + "MAY": 5, + "JUN": 6, + "JUL": 7, + "JLY": 7, + "AUG": 8, + "SEP": 9, + "OCT": 10, + "NOV": 11, + "DEC": 12, +} +NUM2ECLMONTH = {num: month for month, num in ECLMONTH2NUM.items()} logger: logging.Logger = logging.getLogger(__name__) @@ -148,22 +165,24 @@ def write_inc_stdout_file(string: str, outputfilename: str) -> None: def parse_ecl_month(eclmonth: str) -> int: """Translate Eclipse month strings to integer months""" - eclmonth2num = { - "JAN": 1, - "FEB": 2, - "MAR": 3, - "APR": 4, - "MAY": 5, - "JUN": 6, - "JUL": 7, - "JLY": 7, - "AUG": 8, - "SEP": 9, - "OCT": 10, - "NOV": 11, - "DEC": 12, - } - return eclmonth2num[eclmonth] + return ECLMONTH2NUM[eclmonth] + + +def datetime_to_eclipsedate( + timestamp: Union[str, datetime.datetime, datetime.date] +) -> str: + """Convert a Python timestamp or date to the Eclipse DATE format""" + if isinstance(timestamp, str): + if list(map(len, timestamp.split(" ")[0].split("-"))) != [4, 2, 2]: + # Need this as dateutil.parser.isoparse() is not in Python 3.6. + raise ValueError("Use ISO-format for dates") + timestamp = dateutil.parser.parse(timestamp) # noqa (py36 flake8 bug) + if not isinstance(timestamp, (datetime.datetime, datetime.date)): + raise TypeError("Require string or datetime") + string = f"{timestamp.day} '{NUM2ECLMONTH[timestamp.month]}' {timestamp.year}" + if isinstance(timestamp, datetime.datetime): + string += " " + timestamp.strftime("%H:%M:%S") + return string.replace("00:00:00", "").strip() def ecl_keyworddata_to_df( @@ -310,6 +329,7 @@ def parse_opmio_deckrecord( # OPM DeckItem. A better solution has not yet # been found in the OPM API. See also # https://github.com/OPM/opm-common/issues/2598 + # pylint: disable=protected-access if record[item_idx].__defaulted(idx): rec_dict[item_name][idx] = np.nan else: @@ -494,7 +514,13 @@ def df2ecl( consecutive: Optional[str] = None, filename: Optional[str] = None, ) -> str: - """Generate Eclipse include strings from dataframes in ecl2df format + """Generate Eclipse include strings from dataframes in ecl2df format. + + This function hands over the actual text generation pr. keyword + to functions named df2ecl_ in the calling module. + + These functions may again use generic_ecltable() from this module + for the actual string construction. Args: dataframe: Dataframe with Eclipse data on ecl2df format. @@ -596,6 +622,129 @@ def df2ecl( return string +def generic_ecltable( + dframe: pd.DataFrame, + keyword: str, + comment: str = None, + renamer: Dict[str, str] = None, + drop_trailing_columns: bool = True, +) -> str: + """Construct a typical Eclipse table for data following + a keyword. Each row (record in Eclipse terms) ends with a slash. + + This function will *not* add a final slash after all rows, as + this is keyword dependent. Some keywords require it, some keywords + require it to not be there. + + The header is printed as a comment, with header names taken + from the dataframe. + + The renamer is a map that is used to translate your dataframe column + names into opm.common item names, and the dictionary should map + from opm.common names into your chosen ones. If you have standard named + dataframe columns, the renamer is only applied to the column header comment. + + Trailing columns that are all defaulted (that is either np.nan, None) + or consisting of only "1*" will be dropped, as Eclipse will always + interpret that as "1*". + """ + + # Start building the string we are to return: + string = keyword + "\n" + if comment is not None and comment: + string += "\n".join(["-- " + line for line in comment.splitlines()]) + "\n" + + # Empty tables are ok with Eclipse (at least sometimes) + if dframe.empty: + return string + + # Ensure we work on a copy as we are going to modify it in order to have + # Pandas make a pretty txt table: + dframe = dframe.copy() + + # Column names are pr. ec2ldf standard, redo to opm.common in order to use + # sorting from that: + if renamer is not None: + inv_renamer = {value: key for key, value in renamer.items()} + dframe.rename(inv_renamer, axis="columns", inplace=True) + + keyword_col_headers = [item["name"] for item in OPMKEYWORDS[keyword]["items"]] + + rightmost_column = max( + [ + keyword_col_headers.index(item) + for item in set(dframe.columns).intersection(keyword_col_headers) + ], + default=-1, + ) + if rightmost_column == -1: + # No relevant data in the dataframe + return string + relevant_columns = keyword_col_headers[0 : rightmost_column + 1] # noqa + for colname in relevant_columns: + # Add those that are missing, as Eclipse defaults + if colname not in dframe: + dframe[colname] = "1*" + + # Reorder and slice columns: + dframe = dframe[relevant_columns] + + # NaN or Nones are assumed to be defaulted, which in Eclipse terminology is + # the string "1*": + dframe.fillna(value="1*", inplace=True) + + if drop_trailing_columns: + for col_name in reversed(relevant_columns): + if set(dframe[col_name].to_numpy()) == {"1*"}: + del dframe[col_name] + else: + break + + # It is critical for opm.common, maybe also E100 to have integers printed + # as integers, for correct parsing. Ensure these are integer where the json + # says integer before we convert them to strings: + integer_cols = { + item["name"] + for item in OPMKEYWORDS[keyword]["items"] + if item["value_type"] == "INT" # and item["name"] in col_headers + } + for int_col in integer_cols.intersection(dframe.columns): + defaulted_rows = dframe[int_col] == "1*" + dframe.loc[~defaulted_rows, int_col] = ( + dframe.loc[~defaulted_rows, int_col].astype(int).astype(str) + ) + + # Quote all string data. This is not always needed, but needed + # for some colums, for example well-names containing a slash. + string_cols = { + item["name"] + for item in OPMKEYWORDS[keyword]["items"] + if item["value_type"] == "STRING" # and item["name"] in col_headers + } + for str_col in string_cols.intersection(dframe.columns): + # Ensure 1* is not quoted. + non_defaulted_rows = dframe[str_col] != "1*" + dframe.loc[non_defaulted_rows, str_col].str.replace("'", "") + dframe.loc[non_defaulted_rows, str_col] = ( + "'" + dframe.loc[non_defaulted_rows, str_col] + "'" + ) + + # Now rename again to have prettier column names: + if renamer is not None: + dframe.rename(renamer, axis="columns", inplace=True) + # Add a final column with the end-slash, invisible header: + dframe[" "] = "/" + tablestring = dframe.to_string(header=True, index=False) + # Indent all lines with two spaces: + tablestring = "\n".join( + [" " + line.strip().replace(" /", " /") for line in tablestring.splitlines()] + # The replace() in there is needed for py36/pandas==1.1.5 only. + ) + # Eclipse comment for the header line: + tablestring = "--" + tablestring[1:] + return string + tablestring + "\n" + + def runlength_eclcompress(string: str, sep: str = " ") -> str: """Compress a string of space-separated elements so that diff --git a/ecl2df/equil.py b/ecl2df/equil.py index f0ae385dd..2a91e6796 100644 --- a/ecl2df/equil.py +++ b/ecl2df/equil.py @@ -418,41 +418,13 @@ def df2ecl_equil(dframe: pd.DataFrame, comment: Optional[str] = None) -> str: phases = phases_from_columns(subset.columns) - # Make a copy as we are going to modify it in order to have Pandas - # make a pretty txt table: - equildf = subset.copy() - # Column names are pr. ec2ldf standard, redo to opm.common in order - # to use sorting from that: - inv_renamer = {value: key for key, value in RENAMERS[phases].items()} - # print(inv_renamer) - equildf.rename(inv_renamer, axis="columns", inplace=True) - col_headers = [item["name"] for item in common.OPMKEYWORDS["EQUIL"]["items"]] - for colname in col_headers: - # Add those that are missing, as Eclipse defaults - if colname not in equildf: - equildf[colname] = "1*" - # Reorder columns: - equildf = equildf[col_headers] - - # It is critical for opm.common, maybe also E100 to have integers printed - # as integers, for correct parsing. Ensure integer types where - # the json says integer: - integer_cols = [ - item["name"] - for item in common.OPMKEYWORDS["EQUIL"]["items"] - if item["value_type"] == "INT" - ] - for int_col in integer_cols: - # But allow these columns to contain "1*" - if set(equildf[int_col]) != {"1*"}: - equildf[int_col] = equildf[int_col].astype(int) - - # Now rename again to have prettier column names: - equildf.rename(RENAMERS[phases], axis="columns", inplace=True) - # Add a final column with the end-slash, invisible header: - equildf[" "] = "/" - string += "-- " + equildf.to_string(header=True, index=False) - return string + "\n\n" + return common.generic_ecltable( + subset, + "EQUIL", + renamer=RENAMERS[phases], # type: ignore + comment=comment, + drop_trailing_columns=False, + ) def df2ecl_rsvd(dframe: pd.DataFrame, comment: Optional[str] = None) -> str: diff --git a/tests/test_common.py b/tests/test_common.py index 6d4a7565a..496babb0f 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -1,8 +1,11 @@ """Test module for ecl2df.common""" +import datetime import os from pathlib import Path +import numpy as np +import packaging.version import pandas as pd import pytest @@ -221,6 +224,36 @@ def test_df2ecl(): assert string.find("3456") > string.find("1234") +@pytest.mark.parametrize( + "somedate, expected", + [ + pytest.param(None, None, marks=pytest.mark.xfail(raises=TypeError)), + pytest.param({}, None, marks=pytest.mark.xfail(raises=TypeError)), + pytest.param( + "", + None, + marks=pytest.mark.xfail(raises=ValueError, match="ISO string too short"), + ), + ("2021-02-01", "1 'FEB' 2021"), + ("2021-02-01 010203", "1 'FEB' 2021 01:02:03"), + ("2021-02-01 01:02:03", "1 'FEB' 2021 01:02:03"), + (datetime.date(2021, 2, 1), "1 'FEB' 2021"), + (datetime.datetime(2021, 2, 1, 0, 0, 0), "1 'FEB' 2021"), + ("2021-02-01 000000", "1 'FEB' 2021"), + (datetime.datetime(2021, 2, 1, 2, 3, 4), "1 'FEB' 2021 02:03:04"), + (datetime.datetime(2021, 2, 1, 2, 3, 4, 4433), "1 'FEB' 2021 02:03:04"), + pytest.param( + "01/02/2021", + None, + marks=pytest.mark.xfail(raises=ValueError, match="Use ISO"), + ), + ], +) +def test_datetime_to_eclipsedate(somedate, expected): + """Test conversion of datetime to Eclipse date or datetime syntax""" + assert common.datetime_to_eclipsedate(somedate) == expected + + def test_eclcompress(): """Test that we can compress string using Eclipse style run-length encoding""" @@ -268,3 +301,164 @@ def test_eclcompress(): def test_well_matching_template(template, wells, output): "Test that get_wells_matching_template is working as intended." assert common.get_wells_matching_template(template, wells) == output + + +@pytest.mark.parametrize( + "dframe, keyword, comment, renamer, drop_trailing_columns, expected", + [ + pytest.param( + pd.DataFrame(), + "FOO", + None, + None, + False, + "FOO\n", + marks=pytest.mark.xfail(raises=KeyError, match="FOO"), + id="unknown-keyword", + ), + pytest.param( + pd.DataFrame(), + "COMPDAT", + None, + None, + False, + "COMPDAT\n", + id="empty-frame", + ), + pytest.param( + pd.DataFrame(), + "COMPDAT", + "foobar", + None, + False, + "COMPDAT\n-- foobar\n", + id="comment", + ), + pytest.param( + pd.DataFrame(), + "COMPDAT", + "", + None, + False, + "COMPDAT\n", + id="comment-empty-string", + ), + pytest.param( + pd.DataFrame(), + "COMPDAT", + "foo\nbar", + None, + False, + "COMPDAT\n-- foo\n-- bar\n", + id="comment-multiline", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1"}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n-- WELL\n 'OP1' /\n", + id="OP1", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1"}, {"WELL": "OP2"}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n-- WELL\n 'OP1' /\n 'OP2' /\n", + id="two-rows", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1", "DIR": np.nan}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n-- WELL\n 'OP1' /\n", + id="nan-column1", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1", "I": None}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n-- WELL\n 'OP1' /\n", + id="nan-column2", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1", "I": None}]), + "COMPDAT", + None, + None, + False, + "COMPDAT\n-- WELL I\n 'OP1' 1* /\n", + id="nan-column2-no-drop", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1", "J": "2"}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n-- WELL I J\n 'OP1' 1* 2 /\n", + # Here, the I column should not be dropped but defaulted + id="nan-column3", + ), + pytest.param( + pd.DataFrame([{"FOOWELL": "OP1"}]), + "COMPDAT", + None, + {"WELL": "FOOWELL"}, + True, + "COMPDAT\n-- FOOWELL\n 'OP1' /\n", + id="renamer-strange-input-column-names", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1"}]), + "COMPDAT", + None, + {"WELL": "FOO"}, + True, + "COMPDAT\n-- FOO\n 'OP1' /\n", + id="renamer-only-for-header-line", + ), + pytest.param( + pd.DataFrame([{"WELL": "OP1"}]), + "COMPDAT", + None, + {"bogus": "morebogus"}, + True, + "COMPDAT\n-- WELL\n 'OP1' /\n", + id="irrelevant-renamer", + ), + pytest.param( + pd.DataFrame([{"BOGUS": "OP1"}]), + "COMPDAT", + None, + None, + True, + "COMPDAT\n", + id="bogus-column", + ), + ], +) +def test_generic_ecltable( + dframe, keyword, comment, renamer, drop_trailing_columns, expected +): + stringtable = common.generic_ecltable( + dframe, + keyword, + comment=comment, + renamer=renamer, + drop_trailing_columns=drop_trailing_columns, + ) + # Pandas 1.1.5 gives a different amount of whitespace than what + # these tests are written for. If so, be more slack about whitespace. + if packaging.version.parse(pd.__version__) < packaging.version.parse("1.2.0"): + stringtable = " ".join(stringtable.split()) + assert stringtable == " ".join(expected.split()) + else: + assert stringtable == expected