Skip to content

Commit

Permalink
Merge branch 'master' into havbconc
Browse files Browse the repository at this point in the history
  • Loading branch information
berland authored Apr 27, 2020
2 parents 8bff218 + d5c95bd commit 24b8438
Show file tree
Hide file tree
Showing 11 changed files with 118 additions and 54 deletions.
1 change: 1 addition & 0 deletions .github/workflows/fmu-ensemble.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ jobs:
- name: Install fmu-ensemble with dependencies
run: |
pip install --upgrade pip
pip install libecl
pip install .
- name: Install test dependencies
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
with open("README.rst") as readme_file:
readme = readme_file.read()

with open("HISTORY.rst") as history_file:
history = history_file.read()
with open("HISTORY.rst", "rb") as history_file:
# Norwegian characters in HISTORY.rst
history = history_file.read().decode("UTF-8")

REQUIREMENTS = [
"libecl",
# "libecl", # Temporarily removed from requirements to solve problems elsewhere
"numpy",
"pandas>0.23.0",
"pyyaml>=5.1",
Expand Down
43 changes: 25 additions & 18 deletions src/fmu/ensemble/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,14 +797,15 @@ def load_smry(
memory simultaneously
start_date (str or date): First date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. If string, use
start_date will always be included. Overridden if time_index
is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
ISO-format, YYYY-MM-DD.
end_date (str or date): Last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'. If string, use ISO-format, YYYY-MM-DD.
include_restart (boolean): boolean sent to libecl for wheter restarts
files should be traversed
end_date will always be included. Overridden if time_index
is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
include_restart (boolean): boolean sent to libecl for whether restart
files should be traversed.
Returns:
pd.DataFame: Summary vectors for the ensemble, or
a dict of dataframes if stacked=False.
Expand Down Expand Up @@ -1079,21 +1080,23 @@ def get_smry_dates(
yield the sorted union of all valid timesteps for
all realizations. Other valid options are
'daily', 'monthly' and 'yearly'.
'first' will give out the first date (minimum).
'last' will give out the last date (maximum).
normalize: Whether to normalize backwards at the start
and forwards at the end to ensure the raw
date range is covered.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. Overrides
normalized dates. If string, use ISO-format, YYYY-MM-DD.
normalized dates. Overridden if freq is 'first' or 'last'.
If string, use ISO-format, YYYY-MM-DD.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overrides
normalized dates. Overriden if freq is 'last'.
normalized dates. Overridden if freq is 'first' or 'last'.
If string, use ISO-format, YYYY-MM-DD.
include_restart: boolean sent to libecl for wheter restarts
files should be traversed
include_restart: boolean sent to libecl for whether restart
files should be traversed.
Returns:
list of datetimes. Empty list if no data found.
Expand Down Expand Up @@ -1164,6 +1167,9 @@ def _get_smry_dates(eclsumsdates, freq, normalize, start_date, end_date):
if freq == "last":
end_date = max([max(x) for x in eclsumsdates]).date()
return [end_date]
if freq == "first":
start_date = min([min(x) for x in eclsumsdates]).date()
return [start_date]
# These are datetime.datetime, not datetime.date
start_smry = min([min(x) for x in eclsumsdates])
end_smry = max([max(x) for x in eclsumsdates])
Expand Down Expand Up @@ -1235,12 +1241,12 @@ def get_smry_stats(
object in memory after data has been loaded.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included. If string,
use ISO-format, YYYY-MM-DD.
start_date will always be included. Overridden if time_index
is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'. If string, use ISO-format, YYYY-MM-DD.
end_date will always be included. Overridden if time_index
is 'first' or 'last'. If string, use ISO-format, YYYY-MM-DD.
Returns:
A MultiIndex dataframe. Outer index is 'minimum', 'maximum',
'mean', 'p10', 'p90', inner index are the dates. Column names
Expand Down Expand Up @@ -1541,13 +1547,14 @@ def get_smry(
not enough memory to keep all summary files in memory.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
include_restart: boolean sent to libecl for wheter restarts
files should be traversed
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
include_restart: boolean sent to libecl for whether restart
files should be traversed.
Returns:
A DataFame of summary vectors for the ensemble. The column
Expand Down
21 changes: 12 additions & 9 deletions src/fmu/ensemble/ensembleset.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,12 @@ def load_smry(
memory simultaneously
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
Returns:
A DataFame of summary vectors for the ensembleset.
Expand Down Expand Up @@ -632,11 +633,12 @@ def get_smry(
operations
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
Returns:
A DataFame of summary vectors for the EnsembleSet. The column
ENSEMBLE will distinguish the different ensembles by their
Expand Down Expand Up @@ -672,11 +674,12 @@ def get_smry_dates(
memory simultaneously
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
Returns:
list of datetime.date.
"""
Expand Down
1 change: 1 addition & 0 deletions src/fmu/ensemble/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,7 @@ def _clean_observations(self):
"report",
"yearly",
"daily",
"first",
"last",
"monthly",
] and not isinstance(unit["time_index"], datetime.datetime):
Expand Down
45 changes: 25 additions & 20 deletions src/fmu/ensemble/realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,8 +962,8 @@ def get_eclsum(self, cache=True, include_restart=True):
cache: boolean indicating whether we should keep an
object reference to the EclSum object. Set to
false if you need to conserve memory.
include_restart: boolean sent to libecl for whether restarts
files should be traversed
include_restart: boolean sent to libecl for whether restart
files should be traversed.
Returns:
EclSum: object representing the summary file. None if
Expand Down Expand Up @@ -1036,8 +1036,8 @@ def load_smry(
'share/results/tables/unsmry--<time_index>.csv'
where <time_index> is among 'yearly', 'monthly', 'daily', 'last' or
'raw' (meaning the raw dates in the SMRY file), depending
where <time_index> is among 'yearly', 'monthly', 'daily', 'first',
'last' or 'raw' (meaning the raw dates in the SMRY file), depending
on the chosen time_index. If a custom time_index (list
of datetime) was supplied, <time_index> will be called 'custom'.
Expand All @@ -1047,22 +1047,23 @@ def load_smry(
Args:
time_index: string indicating a resampling frequency,
'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will
return the simulated report steps (also default).
'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the
latter will return the simulated report steps (also default).
If a list of DateTime is supplied, data will be resampled
to these.
column_keys: list of column key wildcards. None means everything.
cache_eclsum: boolean for whether to keep the loaded EclSum
object in memory after data has been loaded.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
include_restart: boolean sent to libecl for wheter restarts
files should be traversed
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
include_restart: boolean sent to libecl for whether restart
files should be traversed.
Returns:
DataFrame with summary keys as columns and dates as indices.
Expand Down Expand Up @@ -1125,8 +1126,8 @@ def get_smry(
Arguments:
time_index: string indicating a resampling frequency,
'yearly', 'monthly', 'daily', 'last' or 'raw', the latter will
return the simulated report steps (also default).
'yearly', 'monthly', 'daily', 'first', 'last' or 'raw', the
latter will return the simulated report steps (also default).
If a list of DateTime is supplied, data will be resampled
to these. If a date in ISO-8601 format is supplied, that is
used as a single date.
Expand All @@ -1135,11 +1136,12 @@ def get_smry(
object in memory after data has been loaded.
start_date: str or date with first date to include.
Dates prior to this date will be dropped, supplied
start_date will always be included.
start_date will always be included. Overridden if time_index
is 'first' or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overriden if time_index
is 'last'.
end_date will always be included. Overridden if time_index
is 'first' or 'last'.
Returns empty dataframe if there is no summary file, or if the
column_keys are not existing.
Expand Down Expand Up @@ -1438,19 +1440,22 @@ def get_smry_dates(
yield the sorted union of all valid timesteps for
all realizations. Other valid options are
'daily', 'monthly' and 'yearly'.
'first' will give out the first date (minimum) and
'last' will give out the last date (maximum),
as a list with one element.
both as lists with one element.
normalize: Whether to normalize backwards at the start
and forwards at the end to ensure the raw
date range is covered.
start_date: str or date with first date to include
Dates prior to this date will be dropped, supplied
start_date will always be included. Overrides
normalized dates.
normalized dates. Overridden if freq is 'first'
or 'last'.
end_date: str or date with last date to be included.
Dates past this date will be dropped, supplied
end_date will always be included. Overrides
normalized dates. Overriden if freq is 'last'.
normalized dates. Overridden if freq is 'first'
or 'last'.
Returns:
list of datetimes. None if no summary data is available.
"""
Expand Down Expand Up @@ -1808,7 +1813,7 @@ def normalize_dates(start_date, end_date, freq):
elif freq == "daily":
# This we don't need to normalize, but we should not give any warnings
pass
elif freq == "last":
elif freq == "first" or freq == "last":
# This we don't need to normalize, but we should not give any warnings
pass
else:
Expand Down
12 changes: 10 additions & 2 deletions src/fmu/ensemble/virtualrealization.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,9 @@ def get_smry_dates(self, freq="monthly", normalize=False):
freq: string denoting requested frequency for
the list of datetimes.
'daily', 'monthly' and 'yearly'.
'first' will give out the first date (minimum) and
'last' will give out the last date (maximum),
as a list with one element.
both as lists with one element.
normalize: Whether to normalize backwards at the start
and forwards at the end to ensure the entire
date range is covered.
Expand All @@ -490,6 +491,10 @@ def get_smry_dates(self, freq="monthly", normalize=False):
available_dates = [pd.to_datetime(x) for x in list(available_dates)]
start_date = min(available_dates)
end_date = max(available_dates)
if freq == "first":
return [start_date.date()]
if freq == "last":
return [end_date.date()]
pd_freq_mnenomics = {"monthly": "MS", "yearly": "YS", "daily": "D"}
if normalize:
raise NotImplementedError
Expand Down Expand Up @@ -565,7 +570,10 @@ def _glob_smry_keys(self, column_keys):
available_smry = [x for x in self.keys() if "unsmry" in x]

if not available_smry:
raise ValueError("No summary data to glob from")
raise ValueError(
"No summary data available. Use load_smry() "
"before making a virtual realization."
)

# Merge all internalized columns:
available_keys = set()
Expand Down
10 changes: 9 additions & 1 deletion tests/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,16 +392,21 @@ def test_ensemble_ecl():
assert len(reekensemble.get_smry_dates(freq="yearly")) == 5
assert len(reekensemble.get_smry_dates(freq="monthly")) == 38
assert len(reekensemble.get_smry_dates(freq="daily")) == 1098
assert len(reekensemble.get_smry_dates(freq="first")) == 1
assert len(reekensemble.get_smry_dates(freq="last")) == 1
assert reekensemble.get_smry_dates(freq="first") == reekensemble.get_smry_dates(
freq="first", start_date="1900-01-01", end_date="2050-02-01"
)
assert reekensemble.get_smry_dates(freq="last") == reekensemble.get_smry_dates(
freq="last", end_date="2050-02-01"
freq="last", start_date="1900-01-01", end_date="2050-02-01"
)

assert str(reekensemble.get_smry_dates(freq="report")[-1]) == "2003-01-02 00:00:00"
assert str(reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00"
assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01"
assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01"
assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02"
assert str(reekensemble.get_smry_dates(freq="first")[-1]) == "2000-01-01"
assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02"

assert (
Expand Down Expand Up @@ -431,6 +436,9 @@ def test_ensemble_ecl():
# Check that we can shortcut get_smry_dates:
assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25

assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="first")) == 5
assert isinstance(reekensemble.get_df("unsmry--first.csv"), pd.DataFrame)

assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5
assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_realization.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def test_singlereal_ecl(tmp="TMP"):
)
== 2
)
# Date normalization should be overriden here:
# Date normalization should be overridden here:
assert (
len(
real.get_smry_dates(
Expand Down
13 changes: 13 additions & 0 deletions tests/test_virtualensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ def test_virtualensemble():
assert "REAL" in fopt.columns
assert "FGPT" not in fopt.columns
assert len(fopt) == 25
monthly_smry = vens.get_smry(time_index="monthly")
pd.testing.assert_series_equal(
vens.get_smry(time_index="first")["FOIP"].reset_index(drop=True),
monthly_smry[monthly_smry["DATE"] == min(monthly_smry["DATE"])][
"FOIP"
].reset_index(drop=True),
)
pd.testing.assert_series_equal(
vens.get_smry(time_index="last")["FOIP"].reset_index(drop=True),
monthly_smry[monthly_smry["DATE"] == max(monthly_smry["DATE"])][
"FOIP"
].reset_index(drop=True),
)

# Check that we can default get_smry()
alldefaults = vens.get_smry()
Expand Down
Loading

0 comments on commit 24b8438

Please sign in to comment.