Refactor construction of Eclipse input tables (#381)

* Refactor construction of Eclipse input tables * Fixup! Remove unreachable code
equinor · Apr 26, 2023 · c05263b · c05263b
1 parent 1535f9b
commit c05263b
Show file tree

Hide file tree

Showing 3 changed files with 367 additions and 52 deletions.
diff --git a/ecl2df/common.py b/ecl2df/common.py
@@ -13,6 +13,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Union
 
+import dateutil.parser
 import numpy as np
 import pandas as pd
 import pyarrow
@@ -93,6 +94,22 @@
         .splitlines()
     )
 ]
+ECLMONTH2NUM = {
+    "JAN": 1,
+    "FEB": 2,
+    "MAR": 3,
+    "APR": 4,
+    "MAY": 5,
+    "JUN": 6,
+    "JUL": 7,
+    "JLY": 7,
+    "AUG": 8,
+    "SEP": 9,
+    "OCT": 10,
+    "NOV": 11,
+    "DEC": 12,
+}
+NUM2ECLMONTH = {num: month for month, num in ECLMONTH2NUM.items()}
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -150,22 +167,24 @@ def write_inc_stdout_file(string: str, outputfilename: str) -> None:
 
 def parse_ecl_month(eclmonth: str) -> int:
     """Translate Eclipse month strings to integer months"""
-    eclmonth2num = {
-        "JAN": 1,
-        "FEB": 2,
-        "MAR": 3,
-        "APR": 4,
-        "MAY": 5,
-        "JUN": 6,
-        "JUL": 7,
-        "JLY": 7,
-        "AUG": 8,
-        "SEP": 9,
-        "OCT": 10,
-        "NOV": 11,
-        "DEC": 12,
-    }
-    return eclmonth2num[eclmonth]
+    return ECLMONTH2NUM[eclmonth]
+
+
+def datetime_to_eclipsedate(
+    timestamp: Union[str, datetime.datetime, datetime.date]
+) -> str:
+    """Convert a Python timestamp or date to the Eclipse DATE format"""
+    if isinstance(timestamp, str):
+        if list(map(len, timestamp.split(" ")[0].split("-"))) != [4, 2, 2]:
+            # Need this as dateutil.parser.isoparse() is not in Python 3.6.
+            raise ValueError("Use ISO-format for dates")
+        timestamp = dateutil.parser.parse(timestamp)  # noqa  (py36 flake8 bug)
+    if not isinstance(timestamp, (datetime.datetime, datetime.date)):
+        raise TypeError("Require string or datetime")
+    string = f"{timestamp.day} '{NUM2ECLMONTH[timestamp.month]}' {timestamp.year}"
+    if isinstance(timestamp, datetime.datetime):
+        string += " " + timestamp.strftime("%H:%M:%S")
+    return string.replace("00:00:00", "").strip()
 
 
 def ecl_keyworddata_to_df(
@@ -312,6 +331,7 @@ def parse_opmio_deckrecord(
                     # OPM DeckItem. A better solution has not yet
                     # been found in the OPM API. See also
                     # https://github.com/OPM/opm-common/issues/2598
+                    # pylint: disable=protected-access
                     if record[item_idx].__defaulted(idx):
                         rec_dict[item_name][idx] = np.nan
         else:
@@ -496,7 +516,13 @@ def df2ecl(
     consecutive: Optional[str] = None,
     filename: Optional[str] = None,
 ) -> str:
-    """Generate Eclipse include strings from dataframes in ecl2df format
+    """Generate Eclipse include strings from dataframes in ecl2df format.
+
+    This function hands over the actual text generation pr. keyword
+    to functions named df2ecl_<keywordname> in the calling module.
+
+    These functions may again use generic_ecltable() from this module
+    for the actual string construction.
 
     Args:
         dataframe: Dataframe with Eclipse data on ecl2df format.
@@ -598,6 +624,129 @@ def df2ecl(
     return string
 
 
+def generic_ecltable(
+    dframe: pd.DataFrame,
+    keyword: str,
+    comment: str = None,
+    renamer: Dict[str, str] = None,
+    drop_trailing_columns: bool = True,
+) -> str:
+    """Construct a typical Eclipse table for data following
+    a keyword. Each row (record in Eclipse terms) ends with a slash.
+
+    This function will *not* add a final slash after all rows, as
+    this is keyword dependent. Some keywords require it, some keywords
+    require it to not be there.
+
+    The header is printed as a comment, with header names taken
+    from the dataframe.
+
+    The renamer is a map that is used to translate your dataframe column
+    names into opm.common item names, and the dictionary should map
+    from opm.common names into your chosen ones. If you have standard named
+    dataframe columns, the renamer is only applied to the column header comment.
+
+    Trailing columns that are all defaulted (that is either np.nan, None)
+    or consisting of only "1*" will be dropped, as Eclipse will always
+    interpret that as "1*".
+    """
+
+    # Start building the string we are to return:
+    string = keyword + "\n"
+    if comment is not None and comment:
+        string += "\n".join(["-- " + line for line in comment.splitlines()]) + "\n"
+
+    # Empty tables are ok with Eclipse (at least sometimes)
+    if dframe.empty:
+        return string
+
+    # Ensure we work on a copy as we are going to modify it in order to have
+    # Pandas make a pretty txt table:
+    dframe = dframe.copy()
+
+    # Column names are pr. ec2ldf standard, redo to opm.common in order to use
+    # sorting from that:
+    if renamer is not None:
+        inv_renamer = {value: key for key, value in renamer.items()}
+        dframe.rename(inv_renamer, axis="columns", inplace=True)
+
+    keyword_col_headers = [item["name"] for item in OPMKEYWORDS[keyword]["items"]]
+
+    rightmost_column = max(
+        [
+            keyword_col_headers.index(item)
+            for item in set(dframe.columns).intersection(keyword_col_headers)
+        ],
+        default=-1,
+    )
+    if rightmost_column == -1:
+        # No relevant data in the dataframe
+        return string
+    relevant_columns = keyword_col_headers[0 : rightmost_column + 1]  # noqa
+    for colname in relevant_columns:
+        # Add those that are missing, as Eclipse defaults
+        if colname not in dframe:
+            dframe[colname] = "1*"
+
+    # Reorder and slice columns:
+    dframe = dframe[relevant_columns]
+
+    # NaN or Nones are assumed to be defaulted, which in Eclipse terminology is
+    # the string "1*":
+    dframe.fillna(value="1*", inplace=True)
+
+    if drop_trailing_columns:
+        for col_name in reversed(relevant_columns):
+            if set(dframe[col_name].to_numpy()) == {"1*"}:
+                del dframe[col_name]
+            else:
+                break
+
+    # It is critical for opm.common, maybe also E100 to have integers printed
+    # as integers, for correct parsing. Ensure these are integer where the json
+    # says integer before we convert them to strings:
+    integer_cols = {
+        item["name"]
+        for item in OPMKEYWORDS[keyword]["items"]
+        if item["value_type"] == "INT"  # and item["name"] in col_headers
+    }
+    for int_col in integer_cols.intersection(dframe.columns):
+        defaulted_rows = dframe[int_col] == "1*"
+        dframe.loc[~defaulted_rows, int_col] = (
+            dframe.loc[~defaulted_rows, int_col].astype(int).astype(str)
+        )
+
+    # Quote all string data. This is not always needed, but needed
+    # for some colums, for example well-names containing a slash.
+    string_cols = {
+        item["name"]
+        for item in OPMKEYWORDS[keyword]["items"]
+        if item["value_type"] == "STRING"  # and item["name"] in col_headers
+    }
+    for str_col in string_cols.intersection(dframe.columns):
+        # Ensure 1* is not quoted.
+        non_defaulted_rows = dframe[str_col] != "1*"
+        dframe.loc[non_defaulted_rows, str_col].str.replace("'", "")
+        dframe.loc[non_defaulted_rows, str_col] = (
+            "'" + dframe.loc[non_defaulted_rows, str_col] + "'"
+        )
+
+    # Now rename again to have prettier column names:
+    if renamer is not None:
+        dframe.rename(renamer, axis="columns", inplace=True)
+    # Add a final column with the end-slash, invisible header:
+    dframe[" "] = "/"
+    tablestring = dframe.to_string(header=True, index=False)
+    # Indent all lines with two spaces:
+    tablestring = "\n".join(
+        ["  " + line.strip().replace("  /", " /") for line in tablestring.splitlines()]
+        # The replace() in there is needed for py36/pandas==1.1.5 only.
+    )
+    # Eclipse comment for the header line:
+    tablestring = "--" + tablestring[1:]
+    return string + tablestring + "\n"
+
+
 def runlength_eclcompress(string: str, sep: str = "  ") -> str:
     """Compress a string of space-separated elements so that
 

diff --git a/ecl2df/equil.py b/ecl2df/equil.py
@@ -418,41 +418,13 @@ def df2ecl_equil(dframe: pd.DataFrame, comment: Optional[str] = None) -> str:
 
     phases = phases_from_columns(subset.columns)
 
-    # Make a copy as we are going to modify it in order to have Pandas
-    # make a pretty txt table:
-    equildf = subset.copy()
-    # Column names are pr. ec2ldf standard, redo to opm.common in order
-    # to use sorting from that:
-    inv_renamer = {value: key for key, value in RENAMERS[phases].items()}
-    # print(inv_renamer)
-    equildf.rename(inv_renamer, axis="columns", inplace=True)
-    col_headers = [item["name"] for item in common.OPMKEYWORDS["EQUIL"]["items"]]
-    for colname in col_headers:
-        # Add those that are missing, as Eclipse defaults
-        if colname not in equildf:
-            equildf[colname] = "1*"
-    # Reorder columns:
-    equildf = equildf[col_headers]
-
-    # It is critical for opm.common, maybe also E100 to have integers printed
-    # as integers, for correct parsing. Ensure integer types where
-    # the json says integer:
-    integer_cols = [
-        item["name"]
-        for item in common.OPMKEYWORDS["EQUIL"]["items"]
-        if item["value_type"] == "INT"
-    ]
-    for int_col in integer_cols:
-        # But allow these columns to contain "1*"
-        if set(equildf[int_col]) != {"1*"}:
-            equildf[int_col] = equildf[int_col].astype(int)
-
-    # Now rename again to have prettier column names:
-    equildf.rename(RENAMERS[phases], axis="columns", inplace=True)
-    # Add a final column with the end-slash, invisible header:
-    equildf[" "] = "/"
-    string += "-- " + equildf.to_string(header=True, index=False)
-    return string + "\n\n"
+    return common.generic_ecltable(
+        subset,
+        "EQUIL",
+        renamer=RENAMERS[phases],  # type: ignore
+        comment=comment,
+        drop_trailing_columns=False,
+    )
 
 
 def df2ecl_rsvd(dframe: pd.DataFrame, comment: Optional[str] = None) -> str: