Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature #2612 forecast lead groups #2698

Merged
merged 28 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
98382e4
Refactor to prevent calling sys.exit from functions that could cause …
georgemccabe Jun 12, 2024
ef8520f
per #2596, ensure the same RUN_ID is set for a given METplus run even…
georgemccabe Jun 12, 2024
13da8d0
fixed linter complaints
georgemccabe Jun 12, 2024
e6d2f71
remove unused imports
georgemccabe Jun 12, 2024
e17cdb4
cleanup for SonarQube and linter
georgemccabe Jun 12, 2024
b8a43b2
remove unused imports
georgemccabe Jun 12, 2024
386c67f
import parent wrappers directly so that PyCharm can follow class hier…
georgemccabe Jun 12, 2024
f744c96
per #2612, add support for processing groups of forecast leads
georgemccabe Jun 12, 2024
027feca
remove call to prune_empty function and ci-run-all-diff
georgemccabe Jun 12, 2024
eb39f3a
fixed check on DataFrame to properly check for an empty one
georgemccabe Jun 12, 2024
2e1d3a4
remove prune_empty function that is no longer used
georgemccabe Jun 12, 2024
70c5913
update conda environment requirements for metdataio
georgemccabe Jun 18, 2024
d103d48
set label to Group<n> for LEAD_SEQ_<n> if LEAD_SEQ_<n>_LABEL is not s…
georgemccabe Jun 21, 2024
97f7bd2
increase timeout for request from 60 to 90
georgemccabe Jun 28, 2024
e8cdb61
handle exception and output an error message with a suggestion to rer…
georgemccabe Jun 28, 2024
4b22607
cleanup to address SonarQube complaints, move call to exit out of mai…
georgemccabe Jun 28, 2024
8484362
cleanup to appease SonarQube
georgemccabe Jun 28, 2024
d7b942e
per #2612, add support for creating groups of forecast leads by speci…
georgemccabe Jun 28, 2024
9c2e258
per #2612, add unit tests for function that gets groups of forecast l…
georgemccabe Jun 28, 2024
ab1f3db
various refactoring to satisfy SonarQube complaints
georgemccabe Jun 28, 2024
5231f4c
renamed config LEAD_SEQ_DIVISIONS to LEAD_SEQ_GROUP_SIZE and LEAD_SEQ…
georgemccabe Jun 28, 2024
fe1d748
per #2612, add documentation for new config variables including examples
georgemccabe Jun 28, 2024
d7288b6
fix empty check for DataFrame
georgemccabe Jun 28, 2024
597b952
fix rst typo
georgemccabe Jun 28, 2024
cb3349a
merge develop and resolved conflicts
georgemccabe Jul 1, 2024
d36faaa
minor cleanup to satisfy linter
georgemccabe Jul 3, 2024
c94c495
Per #2612, if runtime freq is RUN_ONCE_PER_LEAD and groups of forecas…
georgemccabe Jul 3, 2024
5df9937
Merge branch 'develop' into feature_2612_forecast_lead_groups
georgemccabe Sep 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/jobs/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def get_dockerhub_url(branch_name):
def docker_get_volumes_last_updated(current_branch):
import requests
dockerhub_url = get_dockerhub_url(current_branch)
dockerhub_request = requests.get(dockerhub_url, timeout=60)
dockerhub_request = requests.get(dockerhub_url, timeout=90)
if dockerhub_request.status_code != 200:
print(f"Could not find DockerHub URL: {dockerhub_url}")
return None
Expand Down
17 changes: 10 additions & 7 deletions .github/jobs/get_data_volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import sys
import os
import subprocess
import shlex

from docker_utils import docker_get_volumes_last_updated, get_branch_name
from docker_utils import get_data_repo, DOCKERHUB_METPLUS_DATA_DEV
Expand Down Expand Up @@ -54,7 +52,12 @@ def main(args):
branch_name = branch_name[5:]

# get all docker data volumes associated with current branch
available_volumes = docker_get_volumes_last_updated(branch_name).keys()
try:
available_volumes = docker_get_volumes_last_updated(branch_name).keys()
except AttributeError:
print("ERROR: Could not get available Docker data volumes "
f"for {branch_name}. Try rerunning failed jobs in GitHub Actions")
return None

# loop through arguments and get data volume for each category
for model_app_name in args:
Expand All @@ -78,7 +81,7 @@ def main(args):
volume_name = f'{prefix}-{model_app_name}'

# add output- to model app name
model_app_name=f'output-{model_app_name}'
model_app_name = f'output-{model_app_name}'

# set DockerHub repo to dev version because all output data
# should be in dev repository
Expand Down Expand Up @@ -108,11 +111,11 @@ def main(args):

if __name__ == "__main__":
# split up command line args that have commas before passing into main
args = []
arg_list = []

for arg in sys.argv[1:]:
args.extend(arg.split(','))
out = main(args)
arg_list.extend(arg.split(','))
out = main(arg_list)
if out is None:
print("ERROR: Something went wrong")
sys.exit(1)
Expand Down
81 changes: 41 additions & 40 deletions .github/jobs/setup_and_run_use_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def main():
# use BuildKit to build image
os.environ['DOCKER_BUILDKIT'] = '1'

isOK = True
is_ok = True
failed_use_cases = []
for setup_commands, use_case_commands, requirements in all_commands:
# get environment image tag
Expand All @@ -79,45 +79,38 @@ def main():
f"-f {DOCKERFILE_DIR}/{dockerfile_name} ."
)

print(f'Building Docker environment/branch image...')
print('Building Docker environment/branch image...')
if not run_commands(docker_build_cmd):
isOK = False
is_ok = False
continue

commands = []

# print list of existing docker images
commands.append('docker images')

# print list of existing docker images,
# remove docker image after creating run env or prune untagged images
commands.append(f'docker image rm dtcenter/metplus-dev:{branch_name} -f')
commands.append('docker image prune -f')
run_commands(commands)

commands = []

# list docker images again after removal
commands.append('docker images')

# start interactive container in the background
commands.append(
f"docker run -d --rm -it -e GITHUB_WORKSPACE "
f"--name {RUN_TAG} "
f"{os.environ.get('NETWORK_ARG', '')} "
f"{' '.join(VOLUME_MOUNTS)} "
f"{volumes_from} --workdir {GITHUB_WORKSPACE} "
f'{RUN_TAG} bash'
)

# list running containers
commands.append('docker ps -a')

# execute setup commands in running docker container
commands.append(_format_docker_exec_command(setup_commands))
run_commands([
'docker images',
f'docker image rm dtcenter/metplus-dev:{branch_name} -f',
'docker image prune -f',
])

# list docker images again after removal,
# start interactive container in the background,
# list running containers,
# then execute setup commands in running docker container
commands = [
'docker images',
(f"docker run -d --rm -it -e GITHUB_WORKSPACE "
f"--name {RUN_TAG} "
f"{os.environ.get('NETWORK_ARG', '')} "
f"{' '.join(VOLUME_MOUNTS)} "
f"{volumes_from} --workdir {GITHUB_WORKSPACE} "
f'{RUN_TAG} bash'),
'docker ps -a',
_format_docker_exec_command(setup_commands),
]

# run docker commands and skip running cases if something went wrong
if not run_commands(commands):
isOK = False
is_ok = False

# force remove container if setup step fails
run_commands(f'docker rm -f {RUN_TAG}')
Expand All @@ -132,23 +125,28 @@ def main():
for use_case_command in use_case_commands:
if not run_commands(_format_docker_exec_command(use_case_command)):
failed_use_cases.append(use_case_command)
isOK = False
is_ok = False

# print bashrc file to see what was added by setup commands
# then force remove container to stop and remove it
if not run_commands([
_format_docker_exec_command('cat /root/.bashrc'),
f'docker rm -f {RUN_TAG}',
]):
isOK = False
is_ok = False

# print summary of use cases that failed
for failed_use_case in failed_use_cases:
print(f'ERROR: Use case failed: {failed_use_case}')
_print_failed_use_cases(failed_use_cases)

if not isOK:
if not is_ok:
print("ERROR: Some commands failed.")
sys.exit(1)

return is_ok


def _print_failed_use_cases(failed_use_cases):
for failed_use_case in failed_use_cases:
print(f'ERROR: Use case failed: {failed_use_case}')


def _format_docker_exec_command(command):
Expand Down Expand Up @@ -201,4 +199,7 @@ def _get_dockerfile_name(requirements):


if __name__ == '__main__':
main()
ret = main()
# exit non-zero if anything went wrong
if not ret:
sys.exit(1)
21 changes: 17 additions & 4 deletions docs/Users_Guide/glossary.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2244,12 +2244,25 @@ METplus Configuration Glossary
LEAD_SEQ_<n>
Specify the sequence of forecast lead times to include in the analysis. Comma separated list format, e.g.:0, 6, 12. <n> corresponds to the bin in which the user wishes to aggregate series by lead results.

| *Used by:* SeriesAnalysis
| *Used by:* All

LEAD_SEQ_<n>_LABEL
Required when SERIES_BY_LEAD_GROUP_FCSTS=True. Specify the label of the corresponding bin of series by lead results.
Specify the label for the :term:`LEAD_SEQ_\<n>` group of forecast leads.

| *Used by:* SeriesAnalysis
| *Used by:* All

LEAD_SEQ_GROUP_SIZE
Defines the size of forecast lead groups to create from :term:`LEAD_SEQ`.
See :ref:`grouping_forecast_leads` for more information.

| *Used by:* All

LEAD_SEQ_GROUP_LABEL
Defines the label to apply for each forecast lead group that are created
using :term:`LEAD_SEQ` and :term:`LEAD_SEQ_GROUP_SIZE`.
See :ref:`grouping_forecast_leads` for more information.

| *Used by:* All

LINE_TYPE
.. warning:: **DEPRECATED:** Please use :term:`LINE_TYPE_LIST` instead.
Expand Down Expand Up @@ -3545,7 +3558,7 @@ METplus Configuration Glossary
.. warning:: **DEPRECATED:** Please use :term:`LEAD_SEQ_\<n>` and :term:`SERIES_ANALYSIS_RUNTIME_FREQ` instead.

SERIES_BY_LEAD_GROUP_FCSTS
.. warning:: **DEPRECATED:** Please use :term:`SERIES_ANALYSIS_GROUP_FCSTS` instead.
.. warning:: **DEPRECATED:** Please use :term:`LEAD_SEQ_\<n>` and :term:`SERIES_ANALYSIS_RUNTIME_FREQ` instead.

SERIES_INIT_FILTERED_OUT_DIR
.. warning:: **DEPRECATED:** Please use :term:`SERIES_ANALYSIS_FILTERED_OUTPUT_DIR` instead.
Expand Down
63 changes: 58 additions & 5 deletions docs/Users_Guide/systemconfiguration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -795,25 +795,78 @@ is equivalent to setting::
[config]
LEAD_SEQ = 0, 3, 6, 9, 12

.. _grouping_forecast_leads:

Grouping Forecast Leads
"""""""""""""""""""""""

Grouping forecast leads is possible as well using a special version of
the :term:`LEAD_SEQ` variable for the
**SeriesByLead Wrapper Only**.
If :term:`SERIES_BY_LEAD_GROUP_FCSTS` = True, then groups of
forecast leads can be defined to be evaluated together.
the :term:`LEAD_SEQ` variable.
If {APP_NAME}_RUNTIME_FREQ, e.g. SERIES_ANALYSIS_RUNTIME_FREQ, is set to
**RUN_ONCE_PER_INIT_OR_VALID**,
then groups of forecast leads can be defined to be evaluated together.
Any number of these groups can be defined by setting
configuration variables LEAD_SEQ_1, LEAD_SEQ_2, ..., :term:`LEAD_SEQ_\<n\>`.
The value can be defined with a
comma-separated list of integers (currently only hours are supported here)
or using :ref:`begin_end_incr`. Each :term:`LEAD_SEQ_\<n\>` must have a
corresponding variable :term:`LEAD_SEQ_<n>_LABEL`. For example::


[config]
LEAD_SEQ_1 = 0, 6, 12, 18
LEAD_SEQ_1_LABEL = Day1
LEAD_SEQ_2 = begin_end_incr(24,42,6)
LEAD_SEQ_2_LABEL = Day2

In this example, the label **Day1** will be used for 0, 6, 12, 18 and
the label **Day2** will be used for 24, 30, 36, 42.

Forecast leads can also be grouped by defining a single list of forecast leads
with :term:`LEAD_SEQ`, then specifying the size of each group using
:term:`LEAD_SEQ_GROUP_SIZE`. For example::

[config]
LEAD_SEQ = 0, 12, 24, 36
LEAD_SEQ_GROUP_SIZE = 1d

This configuration will create groups of forecast leads that each contain 1 day.
This is the equivalent of setting::

[config]
LEAD_SEQ_1 = 0, 12
LEAD_SEQ_2 = 24, 36

Each group will be labeled Group<INDEX> where <INDEX> is the group number.
In this example, the label **Group1** will be used for 0, 12 and
the label **Group2** will be used for 24, 36.
The label can be referenced in filename templates using {label}.

To change the text "Group" to something else, set :term:`LEAD_SEQ_GROUP_LABEL`.
Setting::

LEAD_SEQ_GROUP_LABEL = Day

will label the groups **Day1** and **Day2**.

:term:`LEAD_SEQ_<n>_LABEL` can also be used to change the label for a specific
group. From the previous example, setting::

LEAD_SEQ_2_LABEL = SecondDay

will label the groups **Day1** and **SecondDay**.

If the list of forecast leads contain a gap where there are no leads that fall
within a given group, that group will be skipped. For example::

[config]
LEAD_SEQ = 0, 12, 48, 60
LEAD_SEQ_GROUP_SIZE = 1d
LEAD_SEQ_GROUP_LABEL = Day

The label **Day1** will be used for 0, 12 and
the label **Day3** will be used for 48, 60.
Notice that a **Day2** label is not created.

:term:`INIT_SEQ`
""""""""""""""""

Expand Down
3 changes: 2 additions & 1 deletion internal/scripts/docker_env/scripts/metdataio_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ BASE_ENV=metplus_base.${METPLUS_VERSION}

mamba create -y --clone ${BASE_ENV} --name ${ENV_NAME}

mamba install -y --name ${ENV_NAME} -c conda-forge lxml==4.9.1 pymysql==1.0.2 pandas==1.5.1
#mamba install -y --name ${ENV_NAME} -c conda-forge lxml==4.9.1 pymysql==1.0.2 pandas==1.5.1
mamba install -y --name ${ENV_NAME} -c conda-forge pymysql==1.1.0 pyyaml==6.0 "xarray>=2023.1.0" lxml==4.9.1 netcdf4==1.6.2
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ def test_is_var_item_valid_levels(metplus_config, item_list, configs_to_set, is_
@pytest.mark.parametrize(
'met_config_file, expected',
[
('GridStatConfig_good', (True, [])),
('GridStatConfig_bad', (False, [])),
('GridStatConfig_fake', (False, [])),
('GridStatConfig_good', True),
('GridStatConfig_bad', False),
('GridStatConfig_fake', False),
]
)
@pytest.mark.util
Expand Down
23 changes: 0 additions & 23 deletions internal/tests/pytests/util/system_util/test_system_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,29 +228,6 @@ def test_write_list_to_file(tmp_path_factory):
assert actual == '\n'.join(output_list) + '\n'


@pytest.mark.util
def test_prune_empty(tmp_path_factory):
prune_dir = tmp_path_factory.mktemp('prune')

dir1 = prune_dir / 'empty_file_dir'
dir2 = prune_dir / 'not_empty_file_dir'
dir3 = prune_dir / 'empty_dir'
for d in [dir1, dir2, dir3]:
os.makedirs(d)

# make two files, one empty one not.
open(os.path.join(dir1, 'empty.txt'), 'a').close()
file_with_content = os.path.join(dir2, 'not_empty.txt')
with open(file_with_content, 'w') as f:
f.write('Fee fi fo fum.')

su.prune_empty(prune_dir, mock.Mock())

assert not os.path.exists(dir1)
assert os.path.exists(file_with_content)
assert not os.path.exists(dir3)


@pytest.mark.parametrize(
'regex, expected', [
(r'\d', ['bigfoot/123.txt', 'yeti/234.txt']),
Expand Down
Loading