Merge branch 'master' into havbconc

equinor · Apr 27, 2020 · 24b8438 · 24b8438
2 parents 8bff218 + d5c95bd
commit 24b8438
Show file tree

Hide file tree

Showing 11 changed files with 118 additions and 54 deletions.
diff --git a/.github/workflows/fmu-ensemble.yml b/.github/workflows/fmu-ensemble.yml
@@ -38,6 +38,7 @@ jobs:
       - name: Install fmu-ensemble with dependencies
         run: |
           pip install --upgrade pip
+          pip install libecl
           pip install .
 
       - name: Install test dependencies

diff --git a/setup.py b/setup.py
@@ -16,11 +16,12 @@
 with open("README.rst") as readme_file:
     readme = readme_file.read()
 
-with open("HISTORY.rst") as history_file:
-    history = history_file.read()
+with open("HISTORY.rst", "rb") as history_file:
+    # Norwegian characters in HISTORY.rst
+    history = history_file.read().decode("UTF-8")
 
 REQUIREMENTS = [
-    "libecl",
+    # "libecl",   # Temporarily removed from requirements to solve problems elsewhere
     "numpy",
     "pandas>0.23.0",
     "pyyaml>=5.1",

diff --git a/src/fmu/ensemble/ensemble.py b/src/fmu/ensemble/ensemble.py
@@ -797,14 +797,15 @@ def load_smry(
                 memory simultaneously
             start_date (str or date): First date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included. If string, use
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
                 ISO-format, YYYY-MM-DD.
             end_date (str or date): Last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'. If string, use ISO-format, YYYY-MM-DD.
-            include_restart (boolean): boolean sent to libecl for wheter restarts
-                files should be traversed
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
+            include_restart (boolean): boolean sent to libecl for whether restart
+                files should be traversed.
         Returns:
             pd.DataFame: Summary vectors for the ensemble, or
             a dict of dataframes if stacked=False.
@@ -1079,21 +1080,23 @@ def get_smry_dates(
                yield the sorted union of all valid timesteps for
                all realizations. Other valid options are
                'daily', 'monthly' and 'yearly'.
+               'first' will give out the first date (minimum).
                'last' will give out the last date (maximum).
             normalize:  Whether to normalize backwards at the start
                 and forwards at the end to ensure the raw
                 date range is covered.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overrides
-                normalized dates. If string, use ISO-format, YYYY-MM-DD.
+                normalized dates. Overridden if freq is 'first' or 'last'.
+                If string, use ISO-format, YYYY-MM-DD.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
                 end_date will always be included. Overrides
-                normalized dates. Overriden if freq is 'last'.
+                normalized dates. Overridden if freq is 'first' or 'last'.
                 If string, use ISO-format, YYYY-MM-DD.
-            include_restart: boolean sent to libecl for wheter restarts
-                files should be traversed
+            include_restart: boolean sent to libecl for whether restart
+                files should be traversed.
 
         Returns:
             list of datetimes. Empty list if no data found.
@@ -1164,6 +1167,9 @@ def _get_smry_dates(eclsumsdates, freq, normalize, start_date, end_date):
         if freq == "last":
             end_date = max([max(x) for x in eclsumsdates]).date()
             return [end_date]
+        if freq == "first":
+            start_date = min([min(x) for x in eclsumsdates]).date()
+            return [start_date]
         # These are datetime.datetime, not datetime.date
         start_smry = min([min(x) for x in eclsumsdates])
         end_smry = max([max(x) for x in eclsumsdates])
@@ -1235,12 +1241,12 @@ def get_smry_stats(
                 object in memory after data has been loaded.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included. If string,
-                use ISO-format, YYYY-MM-DD.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'. If string, use ISO-format, YYYY-MM-DD.
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
         Returns:
             A MultiIndex dataframe. Outer index is 'minimum', 'maximum',
             'mean', 'p10', 'p90', inner index are the dates. Column names
@@ -1541,13 +1547,14 @@ def get_smry(
                 not enough memory to keep all summary files in memory.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
-            include_restart: boolean sent to libecl for wheter restarts
-                files should be traversed
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
+            include_restart: boolean sent to libecl for whether restart
+                files should be traversed.
 
         Returns:
             A DataFame of summary vectors for the ensemble. The column

diff --git a/src/fmu/ensemble/ensembleset.py b/src/fmu/ensemble/ensembleset.py
@@ -582,11 +582,12 @@ def load_smry(
                 memory simultaneously
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
 
         Returns:
             A DataFame of summary vectors for the ensembleset.
@@ -632,11 +633,12 @@ def get_smry(
                 operations
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
         Returns:
             A DataFame of summary vectors for the EnsembleSet. The column
             ENSEMBLE will distinguish the different ensembles by their
@@ -672,11 +674,12 @@ def get_smry_dates(
                 memory simultaneously
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
         Returns:
             list of datetime.date.
         """

diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py
@@ -490,6 +490,7 @@ def _clean_observations(self):
                         "report",
                         "yearly",
                         "daily",
+                        "first",
                         "last",
                         "monthly",
                     ] and not isinstance(unit["time_index"], datetime.datetime):

diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py
@@ -962,8 +962,8 @@ def get_eclsum(self, cache=True, include_restart=True):
             cache: boolean indicating whether we should keep an
                 object reference to the EclSum object. Set to
                 false if you need to conserve memory.
-            include_restart: boolean sent to libecl for whether restarts
-                files should be traversed
+            include_restart: boolean sent to libecl for whether restart
+                files should be traversed.
 
         Returns:
             EclSum: object representing the summary file. None if
@@ -1036,8 +1036,8 @@ def load_smry(
 
           'share/results/tables/unsmry--<time_index>.csv'
 
-        where <time_index> is among 'yearly', 'monthly', 'daily', 'last' or
-        'raw' (meaning the raw dates in the SMRY file), depending
+        where <time_index> is among 'yearly', 'monthly', 'daily', 'first',
+        'last' or 'raw' (meaning the raw dates in the SMRY file), depending
         on the chosen time_index. If a custom time_index (list
         of datetime) was supplied, <time_index> will be called 'custom'.
 
@@ -1047,22 +1047,23 @@ def load_smry(
 
         Args:
             time_index: string indicating a resampling frequency,
-               'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will
-               return the simulated report steps (also default).
+               'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the
+               latter will return the simulated report steps (also default).
                If a list of DateTime is supplied, data will be resampled
                to these.
             column_keys: list of column key wildcards. None means everything.
             cache_eclsum: boolean for whether to keep the loaded EclSum
                 object in memory after data has been loaded.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
-            include_restart: boolean sent to libecl for wheter restarts
-                files should be traversed
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
+            include_restart: boolean sent to libecl for whether restart
+                files should be traversed.
 
         Returns:
             DataFrame with summary keys as columns and dates as indices.
@@ -1125,8 +1126,8 @@ def get_smry(
 
         Arguments:
             time_index: string indicating a resampling frequency,
-               'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will
-               return the simulated report steps (also default).
+               'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the
+               latter will return the simulated report steps (also default).
                If a list of DateTime is supplied, data will be resampled
                to these. If a date in ISO-8601 format is supplied, that is
                used as a single date.
@@ -1135,11 +1136,12 @@ def get_smry(
                 object in memory after data has been loaded.
             start_date: str or date with first date to include.
                 Dates prior to this date will be dropped, supplied
-                start_date will always be included.
+                start_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
-                end_date will always be included. Overriden if time_index
-                is 'last'.
+                end_date will always be included. Overridden if time_index
+                is 'first' or 'last'.
 
         Returns empty dataframe if there is no summary file, or if the
         column_keys are not existing.
@@ -1438,19 +1440,22 @@ def get_smry_dates(
                 yield the sorted union of all valid timesteps for
                 all realizations. Other valid options are
                 'daily', 'monthly' and 'yearly'.
+                'first' will give out the first date (minimum) and
                 'last' will give out the last date (maximum),
-                as a list with one element.
+                both as lists with one element.
             normalize: Whether to normalize backwards at the start
                 and forwards at the end to ensure the raw
                 date range is covered.
             start_date: str or date with first date to include
                 Dates prior to this date will be dropped, supplied
                 start_date will always be included. Overrides
-                normalized dates.
+                normalized dates. Overridden if freq is 'first'
+                or 'last'.
             end_date: str or date with last date to be included.
                 Dates past this date will be dropped, supplied
                 end_date will always be included. Overrides
-                normalized dates. Overriden if freq is 'last'.
+                normalized dates. Overridden if freq is 'first'
+                or 'last'.
         Returns:
             list of datetimes. None if no summary data is available.
         """
@@ -1808,7 +1813,7 @@ def normalize_dates(start_date, end_date, freq):
     elif freq == "daily":
         # This we don't need to normalize, but we should not give any warnings
         pass
-    elif freq == "last":
+    elif freq == "first" or freq == "last":
         # This we don't need to normalize, but we should not give any warnings
         pass
     else:

diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py
@@ -463,8 +463,9 @@ def get_smry_dates(self, freq="monthly", normalize=False):
             freq: string denoting requested frequency for
                 the list of datetimes.
                 'daily', 'monthly' and 'yearly'.
+                'first' will give out the first date (minimum) and
                 'last' will give out the last date (maximum),
-                as a list with one element.
+                both as lists with one element.
             normalize: Whether to normalize backwards at the start
                 and forwards at the end to ensure the entire
                 date range is covered.
@@ -490,6 +491,10 @@ def get_smry_dates(self, freq="monthly", normalize=False):
         available_dates = [pd.to_datetime(x) for x in list(available_dates)]
         start_date = min(available_dates)
         end_date = max(available_dates)
+        if freq == "first":
+            return [start_date.date()]
+        if freq == "last":
+            return [end_date.date()]
         pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}
         if normalize:
             raise NotImplementedError
@@ -565,7 +570,10 @@ def _glob_smry_keys(self, column_keys):
         available_smry = [x for x in self.keys() if "unsmry" in x]
 
         if not available_smry:
-            raise ValueError("No summary data to glob from")
+            raise ValueError(
+                "No summary data available. Use load_smry() "
+                "before making a virtual realization."
+            )
 
         # Merge all internalized columns:
         available_keys = set()

diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py
@@ -392,16 +392,21 @@ def test_ensemble_ecl():
     assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
     assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
     assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
+    assert len(reekensemble.get_smry_dates(freq="first")) == 1
     assert len(reekensemble.get_smry_dates(freq="last")) == 1
+    assert reekensemble.get_smry_dates(freq="first") == reekensemble.get_smry_dates(
+        freq="first", start_date="1900-01-01", end_date="2050-02-01"
+    )
     assert reekensemble.get_smry_dates(freq="last") == reekensemble.get_smry_dates(
-        freq="last", end_date="2050-02-01"
+        freq="last", start_date="1900-01-01", end_date="2050-02-01"
     )
 
     assert str(reekensemble.get_smry_dates(freq="report")[-1]) == "2003-01-02 00:00:00"
     assert str(reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00"
     assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01"
     assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01"
     assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02"
+    assert str(reekensemble.get_smry_dates(freq="first")[-1]) == "2000-01-01"
     assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02"
 
     assert (
@@ -431,6 +436,9 @@ def test_ensemble_ecl():
     # Check that we can shortcut get_smry_dates:
     assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25
 
+    assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="first")) == 5
+    assert isinstance(reekensemble.get_df("unsmry--first.csv"), pd.DataFrame)
+
     assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5
     assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame)
 

diff --git a/tests/test_realization.py b/tests/test_realization.py
@@ -546,7 +546,7 @@ def test_singlereal_ecl(tmp="TMP"):
         )
         == 2
     )
-    # Date normalization should be overriden here:
+    # Date normalization should be overridden here:
     assert (
         len(
             real.get_smry_dates(

diff --git a/tests/test_virtualensemble.py b/tests/test_virtualensemble.py
@@ -92,6 +92,19 @@ def test_virtualensemble():
     assert "REAL" in fopt.columns
     assert "FGPT" not in fopt.columns
     assert len(fopt) == 25
+    monthly_smry = vens.get_smry(time_index="monthly")
+    pd.testing.assert_series_equal(
+        vens.get_smry(time_index="first")["FOIP"].reset_index(drop=True),
+        monthly_smry[monthly_smry["DATE"] == min(monthly_smry["DATE"])][
+            "FOIP"
+        ].reset_index(drop=True),
+    )
+    pd.testing.assert_series_equal(
+        vens.get_smry(time_index="last")["FOIP"].reset_index(drop=True),
+        monthly_smry[monthly_smry["DATE"] == max(monthly_smry["DATE"])][
+            "FOIP"
+        ].reset_index(drop=True),
+    )
 
     # Check that we can default get_smry()
     alldefaults = vens.get_smry()