Let summary.df be able to always return datetime index (#134)

* Let summary.df be able to always return datetime index Co-authored-by: Asgeir Nyvoll <47146384+asnyv@users.noreply.github.com>
equinor · Apr 16, 2020 · 2d23627 · 2d23627
1 parent 8dab290
commit 2d23627
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 0 deletions.
diff --git a/ecl2df/summary.py b/ecl2df/summary.py
@@ -186,6 +186,7 @@ def df(
     include_restart=True,
     params=False,
     paramfile=None,
+    datetime=False,
 ):
     """
     Extract data from UNSMRY as Pandas dataframes.
@@ -214,6 +215,11 @@ def df(
             and merged with the summary data.
         paramsfile (str): Explicit path to parameters file if autodiscovery is
             not wanted.
+        datetime (bool): If True, the time index of the returned DataFrame
+            is always of datetime type. If not, it will be datetime
+            if raw dates are requested (which are at second accuracy),
+            or it will be strings in case of yearly, monthly or daily
+            time frequency.
 
     Returns empty dataframe if there is no summary file, or if the
     column_keys are not existing.
@@ -273,6 +279,9 @@ def df(
             # to dump to csv, it should not cause side-effects that floats end up
             # as strings in the dataframe.
             dframe[key] = str(param_dict[key])
+    if datetime:
+        if dframe.index.dtype == 'object':
+            dframe.index = pd.to_datetime(dframe.index)
     return dframe
 
 

diff --git a/tests/test_summary.py b/tests/test_summary.py
@@ -23,11 +23,19 @@ def test_summary2df():
     eclfiles = EclFiles(DATAFILE)
     sumdf = summary.df(eclfiles)
 
+    assert sumdf.index.name == "DATE"
+    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"
+
     assert not sumdf.empty
     assert sumdf.index.name == "DATE"
     assert not sumdf.columns.empty
     assert "FOPT" in sumdf.columns
 
+    sumdf = summary.df(eclfiles, datetime=True)
+    # (datetime=True is superfluous when raw time reports are requested)
+    assert sumdf.index.name == "DATE"
+    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"
+
 
 def test_summary2df_dates(tmpdir):
     """Test that we have some API possibilities with ISO dates"""
@@ -39,6 +47,10 @@ def test_summary2df_dates(tmpdir):
         end_date="2002-03-01",
         time_index="daily",
     )
+    assert sumdf.index.name == "DATE"
+    # This is the default when daily index is requested:
+    assert sumdf.index.dtype == "object"
+
     assert len(sumdf) == 59
     assert str(sumdf.index.values[0]) == "2002-01-02"
     assert str(sumdf.index.values[-1]) == "2002-03-01"
@@ -51,6 +63,16 @@ def test_summary2df_dates(tmpdir):
     assert len(sumdf) == 1
     assert str(sumdf.index.values[0]) == "2000-01-01"
 
+    sumdf = summary.df(
+        eclfiles,
+        start_date=datetime.date(2002, 1, 2),
+        end_date="2002-03-01",
+        time_index="daily",
+        datetime=True,
+    )
+    assert sumdf.index.name == "DATE"
+    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"
+
     tmpcsvfile = tmpdir.join(".TMP-sum.csv")
     sys.argv = [
         "ecl2csv",