From d917ace72c688549a644e407e6588cab7eed0e68 Mon Sep 17 00:00:00 2001 From: Michael Kavulich Date: Mon, 20 Mar 2023 07:23:25 -0600 Subject: [PATCH] [develop] Replace shell-based WE2E scripts with python versions (#637) This PR improves on the new ./run_WE2E_tests.py script (introduced in #558), implementing all the features present in the previous shell-based workflow. Some new files are also introduced for better organization and additional functionality: * tests/WE2E/utils.py This is a collection of functions used by other scripts, contained here to avoid circular dependencies. * tests/WE2E/WE2E_summary.py Given an experiment directory or .yaml file, outputs a summary to screen of each experiment, its status, and the number of core hours used. It also prints a summary file with detailed information about each task for each experiment. * tests/WE2E/print_test_info.py Will print a file WE2E_test_info.txt, very similar to the legacy WE2E_test_info.csv with just a few minor format differences. Any scripts can be run with the -h argument to print information about all available options (not including utils.py, which is not designed to be run stand-alone). With this PR, the old superseded shell-based tools are removed. --- .cicd/Jenkinsfile | 2 +- .cicd/scripts/srw_test.sh | 51 +- .gitignore | 4 +- docs/UsersGuide/source/ConfigWorkflow.rst | 2 +- docs/UsersGuide/source/WE2Etests.rst | 338 ++-- tests/WE2E/WE2E_summary.py | 71 + .../{monitor_jobs.yaml => WE2E_tests.yaml} | 0 tests/WE2E/create_WE2E_resource_summary.py | 187 -- .../WE2E/get_WE2Etest_names_subdirs_descs.sh | 1633 ----------------- tests/WE2E/get_expts_status.sh | 475 ----- tests/WE2E/monitor_jobs.py | 226 +-- tests/WE2E/print_test_info.py | 26 + tests/WE2E/run_WE2E_tests.py | 296 +-- tests/WE2E/run_WE2E_tests.sh | 1379 -------------- tests/WE2E/setup_WE2E_tests.sh | 57 +- ...et_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml | 4 +- .../config.specify_template_filenames.yaml | 6 +- tests/WE2E/utils.py | 568 ++++++ ush/calculate_cost.py | 109 +- ush/python_utils/config_parser.py | 10 + 20 files changed, 1151 insertions(+), 4293 deletions(-) create mode 100755 tests/WE2E/WE2E_summary.py rename tests/WE2E/{monitor_jobs.yaml => WE2E_tests.yaml} (100%) delete mode 100644 tests/WE2E/create_WE2E_resource_summary.py delete mode 100755 tests/WE2E/get_WE2Etest_names_subdirs_descs.sh delete mode 100755 tests/WE2E/get_expts_status.sh create mode 100755 tests/WE2E/print_test_info.py delete mode 100755 tests/WE2E/run_WE2E_tests.sh create mode 100755 tests/WE2E/utils.py diff --git a/.cicd/Jenkinsfile b/.cicd/Jenkinsfile index 92d01de481..c2d054d719 100644 --- a/.cicd/Jenkinsfile +++ b/.cicd/Jenkinsfile @@ -177,7 +177,7 @@ pipeline { post { always { // Archive the test log files - sh 'cd "${SRW_WE2E_EXPERIMENT_BASE_DIR}" && tar --create --gzip --verbose --dereference --file "${WORKSPACE}/we2e_test_logs-${SRW_PLATFORM}-${SRW_COMPILER}.tgz" */log.generate_FV3LAM_wflow */log.launch_FV3LAM_wflow */log/*' + sh 'cd "${SRW_WE2E_EXPERIMENT_BASE_DIR}" && tar --create --gzip --verbose --dereference --file "${WORKSPACE}/we2e_test_logs-${SRW_PLATFORM}-${SRW_COMPILER}.tgz" */log.generate_FV3LAM_wflow */log/* ${WORKSPACE}/tests/WE2E/WE2E_tests_*yaml ${WORKSPACE}/tests/WE2E/WE2E_summary*txt ${WORKSPACE}/tests/WE2E/log.*' // Remove the data sets from the experiments directory to conserve disk space sh 'find "${SRW_WE2E_EXPERIMENT_BASE_DIR}" -regextype posix-extended -regex "^.*(orog|[0-9]{10})$" -type d | xargs rm -rf' s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: 'we2e_test_results-*-*.txt', storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: 'we2e_test_logs-*-*.tgz', storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: [] diff --git a/.cicd/scripts/srw_test.sh b/.cicd/scripts/srw_test.sh index 8df2ff5c2f..8c6ef42528 100755 --- a/.cicd/scripts/srw_test.sh +++ b/.cicd/scripts/srw_test.sh @@ -38,58 +38,15 @@ else fi cd ${we2e_test_dir} -./setup_WE2E_tests.sh ${platform} ${SRW_PROJECT} ${SRW_COMPILER} ${test_type} \ - expt_basedir=${we2e_experiment_base_dir} \ - opsroot=${nco_dir} - -# Run the new run_srw_tests script if the machine is Cheyenne. -if [[ "${platform}" = "cheyenne" ]]; then - cd ${workspace}/ush - ./run_srw_tests.py -e=${we2e_experiment_base_dir} - cd ${we2e_test_dir} -fi - # Progress file progress_file="${workspace}/we2e_test_results-${platform}-${SRW_COMPILER}.txt" - -# Allow the tests to start before checking for status. -# TODO: Create a parameter that sets the initial start delay. -if [[ "${platform}" != "cheyenne" ]]; then - sleep 300 -fi - -# Wait for all tests to complete. -while true; do - - # Check status of all experiments - ./get_expts_status.sh expts_basedir="${we2e_experiment_base_dir}" \ - verbose="FALSE" | tee ${progress_file} - - # Exit loop only if there are not tests in progress - set +e - grep -q "Workflow status: IN PROGRESS" ${progress_file} - exit_code=$? - set -e - - if [[ $exit_code -ne 0 ]]; then - break - fi - - # TODO: Create a paremeter that sets the poll frequency. - sleep 60 -done - -# Allow we2e cron jobs time to complete and clean up themselves -# TODO: Create parameter that sets the interval for the we2e cron jobs; this -# value should be some factor of that interval to ensure the cron jobs execute -# before the workspace is cleaned up. -if [[ "${platform}" != "cheyenne" ]]; then - sleep 600 -fi +./setup_WE2E_tests.sh ${platform} ${SRW_PROJECT} ${SRW_COMPILER} ${test_type} \ + --expt_basedir=${we2e_experiment_base_dir} \ + --opsroot=${nco_dir} | tee ${progress_file} # Set exit code to number of failures set +e -failures=$(grep "Workflow status: FAILURE" ${progress_file} | wc -l) +failures=$(grep " DEAD " ${progress_file} | wc -l) if [[ $failures -ne 0 ]]; then failures=1 fi diff --git a/.gitignore b/.gitignore index 6d4734c975..bc3eee8545 100644 --- a/.gitignore +++ b/.gitignore @@ -7,12 +7,14 @@ lib/ share/ modulefiles/extrn_comp_build/ sorc/*/ -tests/WE2E/WE2E_test_info.csv +tests/WE2E/WE2E_tests_*.yaml tests/WE2E/*.txt tests/WE2E/*.log +tests/WE2E/log.* ush/__pycache__/ ush/config.yaml ush/python_utils/__pycache__/ ush/*.swp *.swp +__pycache__ diff --git a/docs/UsersGuide/source/ConfigWorkflow.rst b/docs/UsersGuide/source/ConfigWorkflow.rst index 6d03dc0fdc..05de68b98b 100644 --- a/docs/UsersGuide/source/ConfigWorkflow.rst +++ b/docs/UsersGuide/source/ConfigWorkflow.rst @@ -174,7 +174,7 @@ METplus Parameters Test Directories ---------------------- -These directories are used only by the ``run_WE2E_tests.sh`` script, so they are not used unless the user runs a Workflow End-to-End (WE2E) test. Their function corresponds to the same variables without the ``TEST_`` prefix. Users typically should not modify these variables. For any alterations, the logic in the ``run_WE2E_tests.sh`` script would need to be adjusted accordingly. +These directories are used only by the ``run_WE2E_tests.py`` script, so they are not used unless the user runs a Workflow End-to-End (WE2E) test (see :numref:`Chapter %s `). Their function corresponds to the same variables without the ``TEST_`` prefix. Users typically should not modify these variables. For any alterations, the logic in the ``run_WE2E_tests.py`` script would need to be adjusted accordingly. ``TEST_EXTRN_MDL_SOURCE_BASEDIR``: (Default: "") This parameter allows testing of user-staged files in a known location on a given platform. This path contains a limited dataset and likely will not be useful for most user experiments. diff --git a/docs/UsersGuide/source/WE2Etests.rst b/docs/UsersGuide/source/WE2Etests.rst index 4495357934..ef5a7ead34 100644 --- a/docs/UsersGuide/source/WE2Etests.rst +++ b/docs/UsersGuide/source/WE2Etests.rst @@ -3,7 +3,7 @@ ================================== Workflow End-to-End (WE2E) Tests ================================== -The SRW App contains a set of end-to-end tests that exercise various workflow configurations of the SRW App. These are referred to as workflow end-to-end (WE2E) tests because they all use the Rocoto workflow manager to run their individual workflows. The purpose of these tests is to ensure that new changes to the App do not break existing functionality and capabilities. +The SRW App contains a set of end-to-end tests that exercise various workflow configurations of the SRW App. These are referred to as workflow end-to-end (WE2E) tests because they all use the Rocoto workflow manager to run their individual workflows from start to finish. The purpose of these tests is to ensure that new changes to the App do not break existing functionality and capabilities. Note that the WE2E tests are not regression tests---they do not check whether current results are identical to previously established baselines. They also do @@ -34,13 +34,13 @@ The test configuration files for these categories are located in the following d ufs-srweather-app/tests/WE2E/test_configs/grids_extrn_mdls_suites_nco ufs-srweather-app/tests/WE2E/test_configs/wflow_features -The script to run the WE2E tests is named ``run_WE2E_tests.sh`` and is located in the directory ``ufs-srweather-app/tests/WE2E``. Each WE2E test has an associated configuration file named ``config.${test_name}.yaml``, where ``${test_name}`` is the name of the corresponding test. These configuration files are subsets of the full range of ``config.yaml`` experiment configuration options. (See :numref:`Chapter %s ` for all configurable options and :numref:`Section %s ` for information on configuring ``config.yaml``.) For each test, the ``run_WE2E_tests.sh`` script reads in the test configuration file and generates from it a complete ``config.yaml`` file. It then calls ``generate_FV3LAM_wflow.py``, which in turn reads in ``config.yaml`` and generates a new experiment for the test. The name of each experiment directory is set to that of the corresponding test, and a copy of ``config.yaml`` for each test is placed in its experiment directory. +The script to run the WE2E tests is named ``run_WE2E_tests.py`` and is located in the directory ``ufs-srweather-app/tests/WE2E``. Each WE2E test has an associated configuration file named ``config.${test_name}.yaml``, where ``${test_name}`` is the name of the corresponding test. These configuration files are subsets of the full range of ``config.yaml`` experiment configuration options. (See :numref:`Chapter %s ` for all configurable options and :numref:`Section %s ` for information on configuring ``config.yaml``.) For each test, the ``run_WE2E_tests.py`` script reads in the test configuration file and generates from it a complete ``config.yaml`` file. It then calls the ``generate_FV3LAM_wflow()`` function, which in turn reads in ``config.yaml`` and generates a new experiment for the test. The name of each experiment directory is set to that of the corresponding test, and a copy of ``config.yaml`` for each test is placed in its experiment directory. -Since ``run_WE2E_tests.sh`` calls ``generate_FV3LAM_wflow.py`` for each test, the -Python modules required for experiment generation must be loaded before ``run_WE2E_tests.sh`` +As with any other experiment within the App, the +Python modules required for experiment generation must be loaded before ``run_WE2E_tests.py`` can be called. See :numref:`Section %s ` for information on loading the Python -environment on supported platforms. Note also that ``run_WE2E_tests.sh`` assumes that all of -the executables have been built (see :numref:`Section %s `). If they have not, then ``run_WE2E_tests.sh`` will still generate the experiment directories, but the workflows will fail. +environment on supported platforms. Note also that ``run_WE2E_tests.py`` assumes that all of +the executables have been built (see :numref:`Section %s `). If they have not, then ``run_WE2E_tests.py`` will still generate the experiment directories, but the workflows will fail. Supported Tests =================== @@ -50,24 +50,111 @@ The full list of WE2E tests is extensive; it is not recommended to run all the t Running the WE2E Tests ================================ -Users may specify the set of tests to run by creating a text file, such as ``my_tests.txt``, which contains a list of the WE2E tests to run (one per line). Then, they pass the name of that file to ``run_WE2E_tests.sh``. For example, to run the tests ``custom_ESGgrid`` and ``grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16`` (from the ``wflow_features`` and ``grids_extrn_mdls_suites_community`` categories, respectively), users would enter the following commands from the ``WE2E`` working directory (``ufs-srweather-app/tests/WE2E/``): +Users may specify the set of tests to run in one of three ways. First, users can pass the name of a single test or list of tests to the script. Secondly, they can pass an option to run the ``fundamental`` or ``comprehensive`` suite of tests, or ``all`` tests (not recommended). Finally, users can create a text file, such as ``my_tests.txt``, which contains a list of the WE2E tests to run (one per line). Any one of these options can be passed to the ``run_WE2E_tests.py`` script via the ``--tests`` or ``-t`` option. + +For example, to run the tests ``custom_ESGgrid`` and ``grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16`` (from the ``wflow_features`` and ``grids_extrn_mdls_suites_community`` categories, respectively), users would enter the following commands from the ``WE2E`` working directory (``ufs-srweather-app/tests/WE2E/``): + +.. code-block:: console + + echo "custom_ESGgrid" > my_tests.txt + echo "grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16" >> my_tests.txt + +For each specified test, ``run_WE2E_tests.py`` will generate a new experiment directory and, by default, launch a second function ``monitor_jobs()`` that will continuously monitor active jobs, submit new jobs, and track the success or failure status of the experiment in a ``.yaml`` file. Finally, when all jobs have finished running (successfully or not), the function ``print_WE2E_summary()`` will print a summary of the jobs to screen, including the job's success or failure, timing information, and (if on an appropriately configured platform) the number of core hours used. An example run would look like this: + +.. code-block:: console + + $ ./run_WE2E_tests.py -t custom_ESGgrid -m hera -a gsd-fv3 --expt_basedir "test_set_01" -q + Checking that all tests are valid + Will run 1 tests: + /user/home/ufs-srweather-app/tests/WE2E/test_configs/wflow_features/config.custom_ESGgrid.yaml + Calling workflow generation function for test custom_ESGgrid + + Workflow for test custom_ESGgrid successfully generated in + /user/home/expt_dirs/test_set_01/custom_ESGgrid + + calling function that monitors jobs, prints summary + Writing information for all experiments to WE2E_tests_20230302214843.yaml + Checking tests available for monitoring... + Starting experiment custom_ESGgrid running + Updating database for experiment custom_ESGgrid + Setup complete; monitoring 1 experiments + Use ctrl-c to pause job submission/monitoring + Experiment custom_ESGgrid is COMPLETE; will no longer monitor. + All 1 experiments finished in 0:13:50.851855 + Calculating core-hour usage and printing final summary + ---------------------------------------------------------------------------------------------------- + Experiment name | Status | Core hours used + ---------------------------------------------------------------------------------------------------- + custom_ESGgrid COMPLETE 35.92 + ---------------------------------------------------------------------------------------------------- + Total COMPLETE 35.92 + + Detailed summary written to WE2E_summary_20230302220233.txt + + All experiments are complete + Summary of results available in WE2E_tests_20230302214843.yaml + + +As the script runs, detailed debug output is written to the file ``log.run_WE2E_tests``. This can be useful for debugging if something goes wrong. You can also use the ``-d`` flag to print all this output to screen during the run, but this can get quite cluttered. + +The final job summary is written by the ``print_WE2E_summary()``; this prints a short summary of experiments to screen, and prints a more detailed summary of all jobs for all experiments in the indicated ``.txt`` file. + +.. code-block:: console + + $ cat WE2E_summary_20230302220233.txt + ---------------------------------------------------------------------------------------------------- + Experiment name | Status | Core hours used + ---------------------------------------------------------------------------------------------------- + custom_ESGgrid COMPLETE 35.92 + ---------------------------------------------------------------------------------------------------- + Total COMPLETE 35.92 + + Detailed summary of each experiment: + + ---------------------------------------------------------------------------------------------------- + Detailed summary of experiment custom_ESGgrid + | Status | Walltime | Core hours used + ---------------------------------------------------------------------------------------------------- + make_grid_201907010000 SUCCEEDED 12.0 0.13 + get_extrn_ics_201907010000 SUCCEEDED 7.0 0.08 + get_extrn_lbcs_201907010000 SUCCEEDED 6.0 0.07 + make_orog_201907010000 SUCCEEDED 62.0 0.69 + make_sfc_climo_201907010000 SUCCEEDED 41.0 0.91 + make_ics_201907010000 SUCCEEDED 180.0 8.00 + make_lbcs_201907010000 SUCCEEDED 228.0 10.13 + run_fcst_201907010000 SUCCEEDED 208.0 13.87 + run_post_f000_201907010000 SUCCEEDED 15.0 0.33 + run_post_f001_201907010000 SUCCEEDED 15.0 0.33 + run_post_f002_201907010000 SUCCEEDED 15.0 0.33 + run_post_f003_201907010000 SUCCEEDED 12.0 0.27 + run_post_f004_201907010000 SUCCEEDED 12.0 0.27 + run_post_f005_201907010000 SUCCEEDED 11.0 0.24 + run_post_f006_201907010000 SUCCEEDED 12.0 0.27 + ---------------------------------------------------------------------------------------------------- + Total COMPLETE 35.92 + + +One might have noticed the line during the experiment run that reads "Use ctrl-c to pause job submission/monitoring". The ``monitor_jobs()`` function (called automatically after all experiments are generated) is designed to be easily paused and re-started if necessary. If you wish to stop actively submitting jobs, simply quitting the script using "ctrl-c" will stop the function, and give a short message on how to continue the experiment. .. code-block:: console - cat > my_tests.txt - custom_ESGgrid - grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 + Setup complete; monitoring 1 experiments + Use ctrl-c to pause job submission/monitoring + ^C + + + User interrupted monitor script; to resume monitoring jobs run: -(and ``Ctrl + D`` to exit). For each test in ``my_tests.txt``, ``run_WE2E_tests.sh`` will generate a new experiment directory and, by default, create a new :term:`cron` job in the user's cron table that will (re)launch the workflow every 2 minutes. This cron job calls the workflow launch script (``launch_FV3LAM_wflow.sh``) until the workflow either completes successfully (i.e., all tasks SUCCEEDED) or fails (i.e., at least one task fails). The cron job is then removed from the user's cron table. + ./monitor_jobs.py -y=WE2E_tests_20230302214324.yaml -p=1 -The examples below demonstrate several common ways that ``run_WE2E_tests.sh`` can be called with the ``my_tests.txt`` file above. These examples assume that the user has already built the SRW App and loaded the regional workflow as described in :numref:`Section %s `. +The full list of options for any of these scripts can be found by using the ``-h`` flag. The examples below demonstrate several of the more common options for ``run_WE2E_tests.py``. These examples (as well as those above) assume that the user has already built the SRW App and loaded the appropriate python environment as described in :numref:`Section %s `. #. To run the tests listed in ``my_tests.txt`` on Hera and charge the computational - resources used to the "rtrr" account, use: + resources used to the "rtrr" account: .. code-block:: - ./run_WE2E_tests.sh tests_file="my_tests.txt" machine="hera" account="rtrr" + ./run_WE2E_tests.py --tests=my_tests.txt --machine=hera --account=rtrr This will create the experiment subdirectories for the two sample WE2E tests in the directory ``${HOMEdir}/../expt_dirs``, where ``HOMEdir`` is the top-level directory for the ufs-srweather-app repository (usually set to something like ``/path/to/ufs-srweather-app``). Thus, the following two experiment directories will be created: @@ -76,85 +163,106 @@ The examples below demonstrate several common ways that ``run_WE2E_tests.sh`` ca ${HOMEdir}/../expt_dirs/custom_ESGgrid ${HOMEdir}/../expt_dirs/grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 - In addition, by default, cron jobs will be added to the user's cron table to relaunch the workflows of these experiments every 2 minutes. + Once these experiment directories are created, the script will call the ``monitor_jobs()`` function. This function runs ``rocotorun`` in the background to monitor the status of jobs in each experiment directory, tracking the status of jobs as they run and complete, and submitting new jobs when they are ready. The progress of ``monitor_jobs()`` is tracked in a file ``WE2E_tests_{datetime}.yaml``, where {datetime} is the date and time (in ``yyyymmddhhmmss`` format) that the file was created. -#. To change the frequency with which the cron relaunch jobs are submitted - from the default of 2 minutes to 1 minute, use: +#. Our second example will run the fundamental suite of tests on Orion, charging computational resources to the "gsd-fv3" account, and placing the experiment subdirectories in a subdirectory named ``test_set_01``: .. code-block:: - ./run_WE2E_tests.sh tests_file="my_tests.txt" machine="hera" account="rtrr" cron_relaunch_intvl_mnts="01" + ./run_WE2E_tests.py -t fundamental -m hera -a gsd-fv3 --expt_basedir "test_set_01" -q -#. To disable use of cron (which implies that the worfkow for each test will have to be relaunched manually from within each experiment directory), use: + In this case, the full paths to the experiment directories will be: .. code-block:: - ./run_WE2E_tests.sh tests_file="my_tests.txt" machine="hera" account="rtrr" use_cron_to_relaunch="FALSE" + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_RAP_suite_HRRR + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v15p2 + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_RRFS_v1beta + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR + ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta + ${HOMEdir}/../expt_dirs/test_set_01/nco_grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR - In this case, the user will have to go into each test's experiment directory and either manually run the ``launch_FV3LAM_wflow.sh`` script or use the Rocoto commands described in :numref:`Chapter %s ` to (re)launch the workflow. Note that if using the Rocoto commands directly, the log file ``log.launch_FV3LAM_wflow`` will not be created; in this case, the status of the workflow can be checked using the ``rocotostat`` command (see :numref:`Section %s ` or :numref:`Section %s `). + The ``--expt_basedir`` option is useful for grouping various sets of tests. It can also be given a full path as an argument, which will place experiments in the given location. -#. To place the experiment subdirectories in a subdirectory named ``test_set_01`` under - ``${HOMEdir}/../expt_dirs`` (instead of immediately under ``expt_dirs``), use: + The ``-q`` flag (as used in the first example shown above), is helpful for keeping the screen less cluttered; this will suppress the output from ``generate_FV3LAM_wflow()``, only printing important messages (warnings and errors) to screen. As always, this output will still be available in the ``log.run_WE2E_tests`` file. - .. code-block:: +#. By default, the job monitoring and submission process is serial, using a single task. For test suites that contain many experiments, this means that the script may take a long time to return to a given experiment and submit the next job, due to the amount of time it takes for the ``rocotorun`` command to complete. In order to speed this process up, provided you have access to a node with the appropriate availability (e.g., submitting from a compute node), you can run the job monitoring processes in parallel using the ``-p`` option: - ./run_WE2E_tests.sh tests_file="my_tests.txt" machine="hera" account="rtrr" expt_basedir="test_set_01" + .. code-block:: - In this case, the full paths to the experiment directories will be: + ./run_WE2E_tests.py -m=jet -a=gsd-fv3-dev -t=all -q -p 6 - .. code-block:: + Depending on your machine settings, this can reduce the time it takes to run all experiments substantially. - ${HOMEdir}/../expt_dirs/test_set_01/custom_ESGgrid - ${HOMEdir}/../expt_dirs/test_set_01/grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 +#. This example will run the single experiment "custom_ESGgrid" on Hera, charging computational resources to the "fv3lam" account. For this example, we submit the suite of tests using the legacy :term:`cron`-based system: - This is useful for grouping various sets of tests. +.. note:: -#. To use a test list file (again named ``my_tests.txt``) located in a custom location instead of in the same directory as ``run_WE2E_tests.sh`` and to have the experiment directories be placed in a specific, non-default location (e.g., ``/path/to/custom/expt_dirs``), use: + This option is not recommended, as it does not work on some machines and can cause system bottlenecks on others. .. code-block:: - ./run_WE2E_tests.sh tests_file="/path/to/custom/location/my_tests.txt" machine="hera" account="rtrr" expt_basedir="/path/to/custom/expt_dirs" + ./run_WE2E_tests.py -t=custom_ESGgrid -m=hera -a=fv3lam --use_cron_to_relaunch --cron_relaunch_intvl_mnts=1 -The full usage statement for ``run_WE2E_tests.sh`` is as follows: - -.. code-block:: +The option ``--use_cron_to_relaunch`` means that, rather than calling the ``monitor_jobs()`` function, the ``generate_FV3LAM_wflow()`` function will create a new :term:`cron` job in the user's cron table that will launch the experiment with the workflow launch script (``launch_FV3LAM_wflow.sh``). By default this script is run every 2 minutes, but we have changed that to 1 minute with the ``--cron_relaunch_intvl_mnts=1`` argument. This script will run until the workflow either completes successfully (i.e., all tasks SUCCEEDED) or fails (i.e., at least one task fails). The cron job is then removed from the user's cron table. - ./run_WE2E_tests.sh \ - tests_file="..." \ - machine="..." \ - account="..." \ - [expt_basedir="..."] \ - [exec_subdir="..."] \ - [use_cron_to_relaunch="..."] \ - [cron_relaunch_intvl_mnts="..."] \ - [verbose="..."] \ - [generate_csv_file="..."] \ - [machine_file="..."] \ - [stmp="..."] \ - [ptmp="..."] \ - [compiler="..."] \ - [build_env_fn="..."] - -The arguments in brackets are optional. A complete description of these arguments can be -obtained by issuing: -.. code-block:: +Checking test status and summary +================================= +By default, ``./run_WE2E_tests.py`` will actively monitor jobs, printing to screen when jobs are complete (either successfully or with a failure), and print a summary file ``WE2E_summary_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt``. +However, if the user is using the legacy crontab option, or would like to summarize one or more experiments that are either not complete or were not handled by the WE2E test scripts, this status/summary file can be generated manually using ``WE2E_summary.py``. +In this example, an experiment was generated using the crontab option, and has not yet finished running. +We use the ``-e`` option to point to the experiment directory and get the current status of the experiment: - ./run_WE2E_tests.sh --help + .. code-block:: -from within the ``ufs-srweather-app/tests/WE2E`` directory. + ./WE2E_summary.py -e /user/home/PR_466/expt_dirs/ + Updating database for experiment grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_RRFS_v1beta + Updating database for experiment grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v16 + Updating database for experiment grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR + Updating database for experiment specify_template_filenames + Updating database for experiment grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR + Updating database for experiment grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta + Updating database for experiment grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_2017_gfdlmp_regional + Updating database for experiment grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_RAP_suite_HRRR + Updating database for experiment grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 + Updating database for experiment grid_RRFS_SUBCONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 + Updating database for experiment specify_DOT_OR_USCORE + Updating database for experiment custom_GFDLgrid__GFDLgrid_USE_NUM_CELLS_IN_FILENAMES_eq_FALSE + Updating database for experiment grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 + ---------------------------------------------------------------------------------------------------- + Experiment name | Status | Core hours used + ---------------------------------------------------------------------------------------------------- + grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_RRFS_v1 COMPLETE 49.72 + grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v16 DYING 6.51 + grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR COMPLETE 411.84 + specify_template_filenames COMPLETE 17.36 + grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_HRRR COMPLETE 16.03 + grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1be COMPLETE 318.55 + grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_2017_g COMPLETE 17.79 + grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_RAP_suite_HRRR COMPLETE 17.76 + grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 RUNNING 0.00 + grid_RRFS_SUBCONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 RUNNING 0.00 + specify_DOT_OR_USCORE QUEUED 0.00 + custom_GFDLgrid__GFDLgrid_USE_NUM_CELLS_IN_FILENAMES_eq_FALS QUEUED 0.00 + grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS QUEUED 0.00 + ---------------------------------------------------------------------------------------------------- + Total RUNNING 855.56 + + Detailed summary written to WE2E_summary_20230306173013.txt + +As with all python scripts in the App, additional options for this script can be viewed by calling with the ``-h`` argument. .. _WE2ETestInfoFile: -The WE2E Test Information File -================================ -In addition to creating the WE2E tests' experiment directories and optionally creating -cron jobs to launch their workflows, the ``run_WE2E_tests.sh`` script generates a CSV (Comma-Separated Value) file named ``WE2E_test_info.csv`` that contains information -on the full set of WE2E tests. This file serves as a single location where relevant -information about the WE2E tests can be found. It can be imported into Google Sheets -using the "|" (pipe symbol) character as the custom field separator. If the user does *not* want ``run_WE2E_tests.sh`` to generate this CSV file the first time it runs, -this functionality can be explicitly disabled by including the ``generate_csv_file="FALSE"`` flag as an argument when running this script. +WE2E Test Information File +================================== + +If the user wants to see consolidated test information, they can generate a file that can be imported into a spreadsheet program (Google Sheets, Microsoft Excel, etc.) that summarizes each test. This file, named ``WE2E_test_info.txt`` by default, is delimited by the ``|`` character, and can be created either by running the ``./print_test_info.py`` script, or by generating an experiment using ``./run_WE2E_tests.py`` with the ``--print_test_info`` flag. The rows of the file/sheet represent the full set of available tests (not just the ones to be run). The columns contain the following information (column titles are included in the CSV file): @@ -225,80 +333,6 @@ The rows of the file/sheet represent the full set of available tests (not just t | ``LBC_SPEC_INTVL_HRS`` | ``NUM_ENS_MEMBERS`` -Additional fields (columns) may be added to the CSV file in the future. - -Note that the CSV file is not part of the ``ufs-srweather-app`` repository and therefore is not tracked by the repository. The ``run_WE2E_tests.sh`` script will generate a CSV file if the ``generate_csv_file`` flag to this script has *not* explicitly been set to false and if either one of the following is true: - -#. The CSV file doesn't already exist. -#. The CSV file does exist, but changes have been made to one or more of the - category subdirectories (e.g., test configuration files modified, added, - or deleted) since the creation of the CSV file. - -Thus, unless the ``generate_csv_file`` flag is set to ``"FALSE"``, the -``run_WE2E_tests.sh`` will create a CSV file the first time it is run in a -fresh git clone of the SRW App. The ``generate_csv_file`` flag is provided -because the CSV file generation can be slow, so users may wish to skip this -step since it is not a necessary part of running the tests. - - -Checking Test Status -====================== -If :term:`cron` jobs are used to periodically relaunch the tests, the status of each test can be checked by viewing the end of the log file (``log.launch_FV3LAM_wflow``). Otherwise (or alternatively), the ``rocotorun``/``rocotostat`` combination of commands can be used. (See :numref:`Section %s ` for details.) - -The SRW App also provides the script ``get_expts_status.sh`` in the directory -``ufs-srweather-app/tests/WE2E``, which can be used to generate -a status summary for all tests in a given base directory. This script updates -the workflow status of each test by internally calling ``launch_FV3LAM_wflow.sh``. Then, it prints out the status of the various tests in the command prompt. It also creates -a status report file named ``expts_status_${create_date}.txt`` (where ``create_date`` -is a time stamp in ``YYYYMMDDHHmm`` format corresponding to the creation date/time -of the report) and places it in the experiment base directory. By default, this status file -contains the last 40 lines from the end of the ``log.launch_FV3LAM_wflow`` file. This number can be adjusted via the ``num_log_lines`` argument. These lines include the experiment status as well as the task status table generated by ``rocotostat`` so that, in case of failure, it is convenient to pinpoint the task that failed. -For details on the usage of ``get_expts_stats.sh``, issue the following command from the ``WE2E`` directory: - -.. code-block:: - - ./get_expts_status.sh --help - -Here is an example of how to call ``get_expts_status.sh`` from the ``WE2E`` directory: - -.. code-block:: console - - ./get_expts_status.sh expts_basedir=/path/to/expt_dirs/set01 - -The path for ``expts_basedir`` should be an absolute path. - -Here is an example of output from the ``get_expts_status.sh`` script: - -.. code-block:: console - - Checking for active experiment directories in the specified experiments - base directory (expts_basedir): - expts_basedir = "/path/to/expt_dirs/set01" - ... - - The number of active experiments found is: - num_expts = 2 - The list of experiments whose workflow status will be checked is: - 'custom_ESGgrid' - 'grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16' - - ====================================== - Checking workflow status of experiment "custom_ESGgrid" ... - Workflow status: SUCCESS - ====================================== - - ====================================== - Checking workflow status of experiment "grid_RRFS_CONUScompact_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16" ... - Workflow status: IN PROGRESS - ====================================== - - A status report has been created in: - expts_status_fp = "/path/to/expt_dirs/set01/expts_status_202204211440.txt" - - DONE. - -The "Workflow status" field of each test indicates the status of its workflow. -The values that this can take on are "SUCCESS", "FAILURE", and "IN PROGRESS". Modifying the WE2E System ============================ @@ -329,30 +363,6 @@ To add a new test named, e.g., ``new_test01``, to one of the existing test categ #. Edit the contents of ``config.new_test01.yaml`` by modifying existing experiment variable values and/or adding new variables such that the test runs with the intended configuration. -.. _AddNewCategory: - -Adding a New WE2E Test Category ------------------------------------ - -To create a new test category called, e.g., ``new_category``: - -#. In the directory ``ufs-srweather-app/tests/WE2E/test_configs``, create a new directory named ``new_category``. - -#. In the file ``get_WE2Etest_names_subdirs_descs.sh``, add the element ``"new_category"`` to the array ``category_subdirs``, which contains the list of categories/subdirectories in which to search for test configuration files. Thus, ``category_subdirs`` becomes: - - .. code-block:: console - - category_subdirs=( \ - "." \ - "grids_extrn_mdls_suites_community" \ - "grids_extrn_mdls_suites_nco" \ - "wflow_features" \ - "new_category" \ - ) - -New tests can now be added to ``new_category`` using the procedure described in :numref:`Section %s `. - - .. _CreateAltTestNames: Creating Alternate Names for a Test @@ -378,7 +388,7 @@ In this situation, the primary name for the test is ``grid_RRFS_CONUScompact_25k * A primary test can have more than one alternate test name (by having more than one symlink pointing to the test's configuration file). * The symlinks representing the alternate test names can be in the same or a different category directory. * The ``--relative`` flag makes the symlink relative (i.e., within/below the ``tests`` directory) so that it stays valid when copied to other locations. (Note, however, that this flag is platform-dependent and may not exist on some platforms.) - * To determine whether a test has one or more alternate names, a user can view the CSV file ``WE2E_test_info.csv`` generated by the ``run_WE2E_tests.sh`` script. Recall from :numref:`Section %s ` that column 1 of this CSV file contains the test's primary name (and its category) while column 2 contains any alternate names (and their categories). - * With this primary/alternate test naming convention, a user can list either the primary test name or one of the alternate test names in the experiments list file (e.g., ``my_tests.txt``) read in by ``run_WE2E_tests.sh``. If more than one name is listed for the same test (e.g., the primary name and and an alternate name, two alternate names, etc.), ``run_WE2E_tests.sh`` will exit with a warning message and will **not** run any tests. + * To determine whether a test has one or more alternate names, a user can view the file ``WE2E_test_info.txt`` as described in :numref:`Section %s ` + * With this primary/alternate test naming convention via symbolic links, if more than one name is listed for the same test (e.g., the primary name and and an alternate name, two alternate names, etc.), ``run_WE2E_tests.py`` will only run the test once diff --git a/tests/WE2E/WE2E_summary.py b/tests/WE2E/WE2E_summary.py new file mode 100755 index 0000000000..de478a0f38 --- /dev/null +++ b/tests/WE2E/WE2E_summary.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import logging + +sys.path.append("../../ush") + +from python_utils import load_config_file + +from check_python_version import check_python_version + +from utils import calculate_core_hours, create_expts_dict, print_WE2E_summary, write_monitor_file + +def setup_logging(debug: bool = False) -> None: + """ + Sets up logging, printing high-priority (INFO and higher) messages to screen, and printing all + messages with detailed timing and routine info in the specified text file. + """ + logging.getLogger().setLevel(logging.DEBUG) + + console = logging.StreamHandler() + if debug: + console.setLevel(logging.DEBUG) + else: + console.setLevel(logging.INFO) + logging.getLogger().addHandler(console) + logging.debug("Logging set up successfully") + + + +if __name__ == "__main__": + + check_python_version() + + #Parse arguments + parser = argparse.ArgumentParser( + description="Script for creating a job summary printed to screen and a file, "\ + "either from a yaml experiment file created by monitor_jobs() or from a "\ + "provided directory of experiments\n") + + req = parser.add_mutually_exclusive_group(required=True) + req.add_argument('-y', '--yaml_file', type=str, + help='YAML-format file specifying the information of jobs to be summarized; '\ + 'for an example file, see WE2E_tests.yaml') + req.add_argument('-e', '--expt_dir', type=str, + help='The full path of an experiment directory, containing one or more '\ + 'subdirectories with UFS SRW App experiments in them') + parser.add_argument('-d', '--debug', action='store_true', + help='Script will be run in debug mode with more verbose output') + + args = parser.parse_args() + + setup_logging(args.debug) + + yaml_file = args.yaml_file + + # Set up dictionary of experiments + if args.expt_dir: + yaml_file, expts_dict = create_expts_dict(args.expt_dir) + elif args.yaml_file: + expts_dict = load_config_file(args.yaml_file) + else: + raise ValueError(f'Bad arguments; run {__file__} -h for more information') + + # Calculate core hours and update yaml + expts_dict = calculate_core_hours(expts_dict) + write_monitor_file(yaml_file,expts_dict) + + #Call function to print summary + print_WE2E_summary(expts_dict, args.debug) diff --git a/tests/WE2E/monitor_jobs.yaml b/tests/WE2E/WE2E_tests.yaml similarity index 100% rename from tests/WE2E/monitor_jobs.yaml rename to tests/WE2E/WE2E_tests.yaml diff --git a/tests/WE2E/create_WE2E_resource_summary.py b/tests/WE2E/create_WE2E_resource_summary.py deleted file mode 100644 index 5095a9fe69..0000000000 --- a/tests/WE2E/create_WE2E_resource_summary.py +++ /dev/null @@ -1,187 +0,0 @@ -''' -Generate a summary of resources used for the WE2E test suite. - -Examples: - - To print usage - - python create_WE2E_resource_summary.py - python create_WE2E_resource_summary.py -h - - To print a report for all the experiments in an experiment directory - - python create_WE2E_resource_summary.py -e /path/to/expt_dir - - To print a report for all the grid_* and nco_* experiments. - - python create_WE2E_resource_summary.py -e /path/to/expt_dir \ - -n 'grid*' 'nco*' - - To compute a total estimated cost for all experiments on instances that are - $0.15 per core hour. - - python create_WE2E_resource_summary.py -e /path/to/expt_dir -c $0.15 - -Information about the output summary. - - - The core hours are an underestimate in many cases. - - Multiple tries are not captured. - - The use of a portion of a node or instance is not known. If the whole node - is used, but isn't reflected in the core count, the cores are not counted. - Partition information is not stored in the database, so mapping to a given - node type becomes ambiguous. - - For example, jobs that request 4 nodes with 2 processors per node with an - --exclusive flag will underestimate the total core hour usage by a factor - of 20 when using a 40 processor node. - - - When computing cost per job, it will also provide an underestimate for the - reasons listed above. - - Only one cost will be applied across all jobs. Rocoto jobs do not store - partition information in the job table, so was not included as an option here. - -''' - -import argparse -import glob -import os -import sys -import sqlite3 - -REPORT_WIDTH = 110 - -def parse_args(argv): - - - ''' - Function maintains the arguments accepted by this script. Please see - Python's argparse documenation for more information about settings of each - argument. - ''' - - parser = argparse.ArgumentParser( - description="Generate a usage report for a set of SRW experiments." - ) - - parser.add_argument( - '-e', '--expt_path', - help='The path to the directory containing the experiment \ - directories', - ) - parser.add_argument( - '-n', '--expt_names', - default=['*'], - help='A list of experiments to generate the report for. Wildcards \ - accepted by glob.glob may be used. If not provided, a report will be \ - generated for all experiments in the expt_path that have a Rocoto \ - database', - nargs='*', - ) - - # Optional - parser.add_argument( - '-c', '--cost_per_core_hour', - help='Provide the cost per core hour for the instance type used. \ - Only supports homogenous clusters.', - type=float, - ) - - return parser.parse_args(argv) - -def get_workflow_info(db_path): - - ''' Given the path to a Rocoto database, return the total number of tasks, - core hours and wall time for the workflow. ''' - - con = sqlite3.connect(db_path) - cur = con.cursor() - - # jobs schema is: - # (id INTEGER PRIMARY KEY, jobid VARCHAR(64), taskname VARCHAR(64), cycle - # DATETIME, cores INTEGER, state VARCHAR(64), native_state VARCHAR[64], - # exit_status INTEGER, tries INTEGER, nunknowns INTEGER, duration REAL) - # - # an example: - # 5|66993580|make_sfc_climo|1597017600|48|SUCCEEDED|COMPLETED|0|1|0|83.0 - try: - cur.execute('SELECT cores, duration from jobs') - except sqlite3.OperationalError: - return 0, 0, 0 - - workflow_info = cur.fetchall() - - core_hours = 0 - wall_time = 0 - ntasks = 0 - for cores, duration in workflow_info: - core_hours += cores * duration / 3600 - wall_time += duration / 60 - ntasks += 1 - - return ntasks, core_hours, wall_time - - -def fetch_expt_summaries(expts): - - ''' Get the important information from the database of each experiment, and - return a list, sorted by experiment name. ''' - - summaries = [] - for expt in expts: - test_name = expt.split('/')[-1] - db_path = os.path.join(expt, 'FV3LAM_wflow.db') - if not os.path.exists(db_path): - print(f'No FV3LAM_wflow.db exists for expt: {test_name}') - continue - ntasks, core_hours, wall_time = get_workflow_info(db_path) - summaries.append((test_name, ntasks, core_hours, wall_time)) - - return sorted(summaries) - -def generate_report(argv): - - ''' Given user arguments, print a summary of the requested experiments' - usage information, including cost (if requested). ''' - - cla = parse_args(argv) - - experiments = [] - for expt in cla.expt_names: - experiments.extend(glob.glob( - os.path.join(cla.expt_path, expt) - )) - - header = f'{" "*60} Core Hours | Run Time (mins)' - if cla.cost_per_core_hour: - header = f'{header} | Est. Cost ($) ' - - print('-'*REPORT_WIDTH) - print('-'*REPORT_WIDTH) - print(header) - print('-'*REPORT_WIDTH) - - total_ch = 0 - total_cost = 0 - for name, ntasks, ch, wt in fetch_expt_summaries(experiments): - line = f'{name[:60]:<60s} {ch:^12.2f} {wt:^20.1f}' - if cla.cost_per_core_hour: - cost = ch * cla.cost_per_core_hour - line = f'{line} ${cost:<.2f}' - total_cost += cost - total_ch += ch - print(line) - - print('-'*REPORT_WIDTH) - print(f'TOTAL CORE HOURS: {total_ch:6.2f}') - if cla.cost_per_core_hour: - print(f'TOTAL COST: ${cla.cost_per_core_hour * total_ch:6.2f}') - - print('*'*REPORT_WIDTH) - print('WARNING: This data reflects only the job information from the last', - 'logged try. It does not account for the use \n of an entire node, only', - 'the actual cores requested. It may provide an underestimate of true compute usage.') - print('*'*REPORT_WIDTH) - - -if __name__ == "__main__": - generate_report(sys.argv[1:]) diff --git a/tests/WE2E/get_WE2Etest_names_subdirs_descs.sh b/tests/WE2E/get_WE2Etest_names_subdirs_descs.sh deleted file mode 100755 index 2e7c312701..0000000000 --- a/tests/WE2E/get_WE2Etest_names_subdirs_descs.sh +++ /dev/null @@ -1,1633 +0,0 @@ -#!/bin/bash - -# -#----------------------------------------------------------------------- -# -# This file defines a function that gathers and returns information about -# the WE2E tests available in the WE2E testing system. This information -# consists of the test names, the category subdirectories in which the -# test configuration files are located (relative to a base directory), -# the test IDs, and the test descriptions. This function optionally -# also creates a CSV (Comma-Separated Value) file containing various -# pieces of information about each of the workflow end-to-end (WE2E) -# tests. These are described in more detail below. -# -# The function takes as inputs the following arguments: -# -# WE2Edir: -# Directory in which the WE2E testing system is located. This system -# consists of the main script for running WE2E tests, various auxiliary -# scripts, and the test configuration files. -# -# generate_csv_file: -# Flag that specifies whether or not a CSV (Comma-Separated Value) file -# containing information about the WE2E tests should be generated. -# -# verbose: -# Optional verbosity flag. Should be set to "TRUE" or "FALSE". Default -# is "FALSE". -# -# outvarname_test_configs_basedir: -# Name of output variable in which to return the base directory of the -# WE2E test configuration files. -# -# outvarname_test_names: -# Name of output array variable in which to return the names of the WE2E -# tests. -# -# outvarname_test_subdirs: -# Name of output array variable in which to return the category subdirectories -# in which the WE2E tests are located. -# -# outvarname_test_ids: -# Name of output array variable in which to return the IDs of the WE2E -# tests. -# -# outvarname_test_descs: -# Name of output array variable in which to return the descriptions of -# the WE2E tests. -# -# Note that any input argument that is not specified in the call to this -# function gets set to a null string in the body of the function. In -# particular, if any of the arguments that start with "outvarname_" -# (indicating that they specify the name of an output variable) are not -# set in the call, the values corresponding to those variables are not -# returned to the calling script or function. -# -# In order to gather information about the available WE2E tests, this -# function sets the local variable test_configs_basedir to the full path -# of the base directory in which the test configuration files (which may -# be ordinary files or symlinks) are located. It sets this as follows: -# -# test_configs_basedir="${WE2Edir}/test_configs" -# -# If the argument outvarname_test_configs_basedir is specified in the -# call to this function, then the value of test_configs_basedir will be -# returned to the calling script or function (in the variable specified -# by outvarname_test_configs_basedir). -# -# The WE2E test configuration files are located in subdirectories under -# the base directory. This function sets the names of these subdirectories -# in the local array category_subdirs. We refer to these as "category" -# subdirectories because they are used for clarity to group the tests -# into categories (instead of putting them all directly under the base -# directory). For example, one category of tests might be those that -# test workflow capabilities such as running multiple cycles and ensemble -# forecasts, another might be those that run various combinations of -# grids, physics suites, and external models for ICs/LBCs, etc. Note -# that if a new category subdirectory is added under test_configs_basedir, -# its name must be added below as a new element in category_subdirs; -# otherwise, this new subdirectory will not be searched for test -# configuration files. Note also that if one of the elements of -# category_subdirs is ".", then this function will also search directly -# under the base directory itself for test configuration files. -# -# Once test_configs_basedir and category_subdirs are set, this function -# searches the category subdirectories for WE2E test configuration files. -# In doing so, it assumes that any ordinary file or symlink in the category -# subdirectories having a name of the form -# -# config.${test_name}.sh -# -# is a test configuration file, and it takes the name of the corresponding -# test to be given by whatever test_name in the above file name happens -# to be. Here, by "ordinary" file we mean an item in the file system -# that is not a symlink (or a directory or other more exotic entity). -# Also, for simplicity, we require that any configuration file that is a -# symlink have a target that is an ordinary configuration file, i.e. not -# a symlink. -# -# We allow test configuration files to be symlinks in order to avoid the -# presence of identical configuration files with different names in the -# WE2E testing system. For example, assume there is a test named -# "test_grid1" that is used to test whether the forecast model can run -# on a grid named "grid1", and assume that the configuration file for -# this test is an ordinary file located in a category subdirectory named -# "grids" that contains tests for various grids. Then the full path to -# this configuration file will be -# -# ${test_configs_basedir}/grids/config.test_grid1.sh -# -# Now assume that there is another category subdirectory named "suites" -# that contains configuration files for tests that check whether the -# forecast model can run with various physics suites. Thus, in order to -# have a test that checks whether the forecast model can run successfully -# with a physics suite named "suite1", we might create an ordinary -# configuration file named "config.test_suite1.sh" in "suites" (so that -# the corresponding test name is "test_suite1"). Thus, the full path to -# this configuration file would be -# -# ${test_configs_basedir}/suites/config.test_suite1.sh -# -# Now if test "test_grid1" happens to use physics suite "suite1", then -# we may be able to use that test for testing both "grid1" and "suite1". -# However, we'd still want to have a configuration file in the "suites" -# subdirectory with a test name that makes it clear that the purpose of -# the test is to run using "suite1". Then, since the WE2E testing system -# allows configuration files to by symlinks, instead of copying -# "config.test_grid1.sh" from the "grids" to the "suites" subdirectory -# and renaming it to "config.test_suite1.sh" (which would create two -# identical ordinary configuration files), we could simply make -# "config.test_suite1.sh" in "suites" a symlink to "config.test_grid1.sh" -# in "grids", i.e. -# -# ${test_configs_basedir}/suites/config.test_suite1.sh -# --> ${test_configs_basedir}/grids/config.test_grid1.sh -# -# With this approach, there will be only one ordinary configuration file -# to maintain. Note that there may be more than one symlink pointing to -# the same ordinary configuration file. For example, there may be another -# category subdirectory named "wflow_features" containing tests for -# various workflow features. Then if the test "test_grid1" runs a test -# that, in addition to running the forecast model on "grid1" using the -# "suite1" physics suite also performs subhourly output, then a symlink -# named "config.test_subhourly.sh" can be created under "wflow_features" -# that points to the configuration file "config.test_grid1.sh", i.e. -# -# ${test_configs_basedir}/wflow_features/config.test_subhourly.sh -# --> ${test_configs_basedir}/grids/config.test_grid1.sh -# -# Since the WE2E testing system allows configuration files to be symlinks, -# the same WE2E test may be referred to via multiple test names -- the -# test name corresponding to the ordinary configuration file ("test_grid1" -# in the example above) and any one of the test names corresponding to -# any symlinks that have this ordinary file as their target ("test_suite1" -# and "test_subhourly" in the example above). Here, for clarity we will -# refer to the test name derived from the name of the ordinary configuration -# file as the "primary" test name, and we will refer to the test names -# dervied from the symlinks as the alternate test names. Since these -# test names all represent the same actual test, we also assign to each -# group of primary and alternate test names a single test ID. This is -# simply an integer that uniquely identifies each group of primary and -# alternate test names. -# -# For each configuration file (which may be an ordinary file or a symlink) -# found in the category subdirectories, this function saves in local -# arrays the following information about the WE2E files: -# -# 1) The list of all available WE2E test names, both primary and alternate. -# 2) The category subdirectories under the base directory test_configs_basedir -# in which the test configuration files corresponding to each test -# name are located. -# 3) The IDs corresponding to each of the test names. -# 4) The test descriptions (if outvarname_test_descs is specified in the -# call to this function or if generate_csv_file is or gets set to -# "TRUE"; see below). -# -# These local arrays are sorted in order of increasing test ID. Within -# each group of tests that have the same ID, the primary test name is -# listed first followed by zero or more alternate test names. Note also -# that to reduce confusion, we do not allow two or more configuration -# files of the same name anywere under test_configs_basedir (either -# representing the same actual test or different ones). In other words, -# the list of all test names that this function generates cannot contain -# any duplicate names (either primary or alternate). After assembling -# the full list of test names, this function checks for such duplicates -# and exits with an error message if any are found. -# -# The following input arguments to this function specify the names of -# the arrays in which each of the quantities listed above should be -# returned (to the calling script or function): -# -# outvarname_test_names -# outvarname_test_subdirs -# outvarname_test_ids -# outvarname_test_descs -# -# If any of these is not specified in the call to this function, then -# the corresponding quantity will not be returned to the calling script -# or function. -# -# The test descriptions are headers consisting of one or more bash-style -# comment lines at the top of each ordinary test configuraiton file. -# They are extracted from each such file and placed in a local array only -# if one or both of the following conditions are met: -# -# 1) The user explicitly asks for the descriptions to be returned by -# specifying in the call to this function the name of the array in -# which to return them (by setting a value for the argument -# outvarname_test_descs). -# 2) A CSV file summarizing the WE2E tests will be generated (see below) -# -# For convenience, this function can generate a CSV (comma-separated -# value) file containing information about the WE2E tests. If it does, -# the file will be placed in the main WE2E testing system directory -# specified by the input argument WE2Edir. The CSV file can be read -# into a spreadsheet in Google Sheets (or another similar tool) to get -# an overview of all the available WE2E tests. The rows of the CSV file -# correspond to the primary WE2E tests, and the columns correspond to -# the (primary) test name, alternate test names (if any), test description, -# number of times the test calls the forecast model, and values of various -# SRW App experiment variables for that test. -# -# A CSV file will be generated in the directory specified by WE2Edir if -# one or more of the following conditions hold: -# -# 1) The input argument generate_csv_file is set to "TRUE" in the call -# to this function. -# 2) The input argument generate_csv_file is not set in the call to this -# function, and a CSV file does not already exist. -# 3) The input argument generate_csv_file is not set in the call to this -# function, a CSV file already exists, and the modification time of -# at least one category subdirectory in category_subdirs is later -# than that of the CSV file, i.e. the existing CSV file needs to be -# updated because the test configuration files may have changed in -# some way. -# -# A CSV file is not generated if generate_csv_file is explicitly set to -# "FALSE" in the call to this function (regardless of whether or not a -# CSV file already exists). If a CSV file is generated, it is placed in -# the directory specified by the input argment WE2Edir, and it overwrites -# any existing copies of the file in that directory. The contents of -# each column of the CSV file are described below. -# -#----------------------------------------------------------------------- -# -function get_WE2Etest_names_subdirs_descs() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script or function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Source constant files. -# -#----------------------------------------------------------------------- -# - source_config $USHdir/constants.yaml -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script or function. -# Then process the arguments provided to it on the command line (which -# should consist of a set of name-value pairs of the form arg1="value1", -# arg2="value2", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "WE2Edir" \ - "generate_csv_file" \ - "verbose" \ - "outvarname_test_configs_basedir" \ - "outvarname_test_names" \ - "outvarname_test_subdirs" \ - "outvarname_test_ids" \ - "outvarname_test_descs" \ - ) - process_args "valid_args" "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Make the default value of "verbose" "FALSE". Then make sure "verbose" -# is set to a valid value. -# -#----------------------------------------------------------------------- -# - verbose=${verbose:-"FALSE"} - check_var_valid_value "verbose" "valid_vals_BOOLEAN" - verbose=$(boolify "$verbose") -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local abs_cost_ref \ - ac \ - all_items \ - alt_test_name \ - alt_test_names \ - alt_test_names_subdirs \ - alt_test_prim_test_names \ - alt_test_subdir \ - alt_test_subdirs \ - array_names_vars_to_extract \ - array_names_vars_to_extract_orig \ - category_subdirs \ - cmd \ - column_titles \ - config_fn \ - crnt_item \ - crnt_title \ - csv_delimiter \ - csv_fn \ - csv_fp \ - cwd \ - default_val \ - dt_atmos \ - fcst_len_hrs \ - get_test_descs \ - hash_or_null \ - i \ - ii \ - j \ - jp1 \ - k \ - line \ - mod_time_csv \ - mod_time_subdir \ - msg \ - nf \ - num_alt_tests \ - num_category_subdirs \ - num_cdates \ - num_cycles_per_day \ - num_days \ - num_fcsts \ - num_fcsts_orig \ - num_grid_pts \ - num_items \ - num_occurrences \ - num_prim_tests \ - num_tests \ - num_time_steps \ - num_vars_to_extract \ - prim_array_names_vars_to_extract \ - prim_test_descs \ - prim_test_dt_atmos \ - prim_test_ids \ - prim_test_name_subdir \ - prim_test_names \ - prim_test_num_fcsts \ - prim_test_rel_cost \ - prim_test_subdirs \ - rc \ - regex_search \ - rel_cost \ - row_content \ - sort_inds \ - stripped_line \ - subdir \ - subdir_fp \ - subdirs \ - target_dir \ - target_fn \ - target_fp \ - target_prim_test_name \ - target_rp \ - target_test_name_or_null \ - test_configs_basedir \ - test_desc \ - test_descs \ - test_descs_esc_sq \ - test_descs_orig \ - test_descs_str \ - test_id \ - test_id_next \ - test_ids \ - test_ids_and_inds \ - test_ids_and_inds_sorted \ - test_ids_orig \ - test_ids_str \ - test_name \ - test_name_or_null \ - test_names \ - test_names_orig \ - test_names_str \ - test_subdirs \ - test_subdirs_orig \ - test_subdirs_str \ - test_type \ - units \ - val \ - var_name \ - var_name_at \ - vars_to_extract - - local dta \ - nxny \ - dta_r \ - nxny_r -# -#----------------------------------------------------------------------- -# -# Set variables associated with the CSV (comma-separated value) file that -# this function may generate. The conditions under which such a file is -# generated are described above in the description of this function. -# -#----------------------------------------------------------------------- -# -# Set the name and full path to the CSV file. -# - csv_fn="WE2E_test_info.csv" - csv_fp="${WE2Edir}/${csv_fn}" -# -# If generate_csv_file is specified as an input argument in the call to -# this function, make sure that it is set to a valid value. -# - if [ ! -z "${generate_csv_file}" ]; then - - check_var_valid_value "generate_csv_file" "valid_vals_BOOLEAN" - generate_csv_file=$(boolify "${generate_csv_file}") -# -# If generate_csv_file was not specified as an input argument in the -# call to this function, then it will have been set above to a null -# string. In this case, if a CSV file doesn't already exsit, reset -# generate_csv_file to "TRUE" so that one will be generated. If a CSV -# file does exist, get its modification time so that later below, we can -# compare it to the modification times of the category subdirectories -# and determine whether a new CSV file needs to be generated. -# -# Note that the modification "times" obtained here and later below using -# the "stat" utility are the seconds elapsed between Epoch (which is a -# fixed point in time) and the last modification time of the specified -# file, not the dates/times at which the file was last modified. This -# is due to the use of the "--format=%Y" flag in the call to "stat". We -# choose these "seconds since Epoch" units because they make it easier -# to determine which of two files is younger/older (the one with the -# larger seconds-since-Epoch will be the more recently modified file.) -# - else - - if [ ! -f "${csv_fp}" ]; then - mod_time_csv="0" - generate_csv_file="TRUE" - else - mod_time_csv=$( stat --format=%Y "${csv_fp}" ) - fi - - fi - - if [ "${generate_csv_file}" = "TRUE" ]; then - print_info_msg " -Will generate a CSV (Comma Separated Value) file (csv_fp) containing -information on all WE2E tests: - csv_fp = \"${csv_fp}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set the base directory containing the WE2E test configuration files -# (or, more precisely, containing the category subdirectories in which -# the configuration files are located). -# -#----------------------------------------------------------------------- -# - test_configs_basedir="${WE2Edir}/test_configs" -# -#----------------------------------------------------------------------- -# -# Set the array category_subdirs that specifies the subdirectories under -# test_configs_basedir in which to search for WE2E test configuration -# files. Note that if "." is included as one of the elements of this -# array, then the base directory itself will also be searched. -# -#----------------------------------------------------------------------- -# - category_subdirs=( \ - "." \ - "grids_extrn_mdls_suites_community" \ - "grids_extrn_mdls_suites_nco" \ - "release_SRW_v1" \ - "verification" \ - "wflow_features" \ - ) - num_category_subdirs="${#category_subdirs[@]}" - - orig_dir=$(pwd) -# -#----------------------------------------------------------------------- -# -# Loop over the category subdirectories under test_configs_basedir -# (possibly including the base directory itself). In each subdirectory, -# consider all items that have names of the form -# -# config.${test_name}.sh -# -# and that are either ordinary files (i.e. not symlinks) or are symlinks -# whose targets are ordinary files having names of the form above. For -# each item that is an ordinary file, save the corresponding primary test -# name, the category subdirectory in which the item is located, and the -# test ID in the arrays -# -# prim_test_names -# prim_test_subdirs -# prim_test_ids -# -# respectively. For each item that is a symlink to an ordinary file, -# save the alternate test name corresponding to the symlink name, the -# category subdirectory in which the symlink is located, and the test -# name derived from the name of the symlink's target (i.e. the primary -# test name that this alternate test name corresponds to) in the arrays -# -# alt_test_names -# alt_test_subdirs -# alt_test_prim_test_names -# -# respectively. -# -#----------------------------------------------------------------------- -# - prim_test_names=() - prim_test_ids=() - prim_test_subdirs=() - prim_test_num_fcsts=() - prim_test_dt_atmos=() - prim_test_rel_cost=() - - alt_test_names=() - alt_test_subdirs=() - alt_test_prim_test_names=() -# -# Initialize the counter that will be used to assign test IDs to the -# primary test names. This will be incremented below every time a new -# primary test name is found. Note that we do not yet assign IDs to the -# alternate test names. These will be assigned IDs later below that -# will be identical to the IDs of the primary thest names they correspond -# to. -# - test_id="0" - - for (( i=0; i<=$((num_category_subdirs-1)); i++ )); do - - subdir="${category_subdirs[$i]}" - subdir_fp="${test_configs_basedir}/$subdir" -# -# If at this point in the code generate_csv_file is still set to a null -# string, it means that a CSV file containing information about the WE2E -# tests already exists. In this case, a new version of this file needs -# to be generated only if one or more of the category subdirectories -# have modification times that are later than that of the existing CSV -# file. Check for this condition and set generate_csv_file accordingly. -# Note that this if-statement will be executed at most once since it sets -# generate_csv_file to "TRUE", after which the test for entering the if- -# statement will be false. -# - if [ -z "${generate_csv_file}" ]; then - if [ -f "${subdir_fp}/*.yaml" ]; then - mod_time_subdir=$( stat --format=%Y "${subdir_fp}"/*.yaml | sort -n | tail -1 ) - else - mod_time_subdir="0" - fi - if [ "${mod_time_subdir}" -gt "${mod_time_csv}" ]; then - generate_csv_file="TRUE" - print_info_msg " -The current category subdirectory (subdir) has a modification time -(mod_time_subdir) that is later than the modification time (mod_time_csv) -of the existing CSV file (csv_fp) containing WE2E test information: - subdir = \"${subdir}\" - mod_time_subdir = \"${mod_time_subdir}\" (in units of seconds since Epoch) - mod_time_csv = \"${mod_time_csv}\" (in units of seconds since Epoch) - csv_fp = \"${csv_fp}\" -Thus, the CSV file must be updated. Setting generate_csv_file to \"TRUE\" -to generate a new CSV file: - generate_csv_file = \"${generate_csv_file}\"" - fi - fi -# -# Change location to the current category subdirectory. -# - cd_vrfy "${subdir_fp}" -# -# Get the contents of the current subdirectory. We consider each item -# that has a name of the form -# -# config.${test_name}.sh -# -# to be a WE2E test configuration file, and we take the name of the test -# to be whatever ${test_name} in the above expression corresponds to. -# We ignore all other items in the subdirectory. -# - all_items=( $(ls -1) ) - num_items="${#all_items[@]}" - for (( j=0; j<=$((num_items-1)); j++ )); do - - crnt_item="${all_items[$j]}" -# -# Try to extract the name of the test from the name of the current item -# and place the result in test_name_or_null. test_name_or_null will -# contain the name of the test only if the item has a name of the form -# "config.${test_name}.sh", in which case it will be equal to ${test_name}. -# Otherwise, it will be a null string. -# - regex_search="^config\.(.*)\.yaml$" - test_name_or_null=$( printf "%s\n" "${crnt_item}" | \ - sed -n -r -e "s/${regex_search}/\1/p" ) -# -#----------------------------------------------------------------------- -# -# Take further action for this item only if it has a name of the form -# above expected for a WE2E test configuration file, which will be the -# case only if test_name_or_null is not a null string. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${test_name_or_null}" ]; then -# -#----------------------------------------------------------------------- -# -# Use bash's -h conditional operator to check whether the current item -# (which at this point is taken to be a test configuration file) is a -# symlink. If it is a symlink, the only type of entity we allow the -# target to be is an existing ordinary file. In particular, to keep the -# WE2E testing system simple, we do not allow the target to be a symlink. -# Of course, it also cannot be a directory or other exotic entity. Below, -# we check for these various possibilities and only allow the case of the -# target being an existing ordinary file. -# -#----------------------------------------------------------------------- -# - if [ -h "${crnt_item}" ]; then -# -# Extract the name of the test from the name of the symlink and append -# it to the array alt_test_names. Also, append the category subdirectory -# under test_configs_basedir in which the symlink is located to the array -# alt_test_subdirs. -# - alt_test_names+=("${test_name_or_null}") - alt_test_subdirs+=("$subdir") -# -# Get the full path to the target of the symlink without following targets -# that are themselves symlinks. The "readlink" utility without any flags -# (such as -f) can do this, but when -f is omitted, it returns a relative -# path. To convert that relative path to an absolute path without resolving -# symlinks, use the "realpath" utility with the -s flag. -# - target_rp=$( readlink "${crnt_item}" ) - target_fp=$( realpath -s "${target_rp}" ) -# -# Use bash's -h conditional operator to check whether the target itself -# is a symlink. For simplicity, this is not allowed. Thus, in this -# case, print out an error message and exit. -# - if [ -h "${target_fp}" ]; then - cwd="$(pwd)" - print_err_msg_exit "\ -The symlink (crnt_item) in the current directory (cwd) has a target -(target_fp) that is itself a symlink: - cwd = \"${cwd}\" - crnt_item = \"${crnt_item}\" - target_fp = \"${target_fp}\" -This is not allowed. Please ensure that the current item points to an -ordinary file (i.e. not a symlink) and rerun." - fi -# -# Now use bash's -f conditional operator to check whether the target is -# a "regular" file (as defined by bash). Note that this test will return -# false if the target is a directory or does not exist and true otherwise. -# Thus, the negation of this test applied to the target (i.e. ! -f) that -# we use below will be true if the target is not an existing file. In -# this case, we print out an error message and exit. -# -# Note also that the -f operator recursively follows a symlink passed to -# it as an argument. For this reason, we need to first perform the -h -# test above to check that the target (without resolving symlinks) is -# itself not a symlink. The -f test below does not help in this regard. -# - if [ ! -f "${target_fp}" ]; then - cwd="$(pwd)" - print_err_msg_exit "\ -The symlink (crnt_item) in the current directory (cwd) has a target -(target_fp) that is not an existing ordinary file: - cwd = \"${cwd}\" - crnt_item = \"${crnt_item}\" - target_fp = \"${target_fp}\" -This is probably because either the target doesn't exist or is a directory, -neither of which is allowed because the symlink must point to an ordinary -(i.e. non-symlink) WE2E test configuration file. Please either point the -symlink to such a file or remove it, then rerun." - fi -# -# Get the name of the directory in which the target is located. -# - target_dir=$( dirname "${target_fp}" ) -# -# Next, check whether the directory in which the target is located is -# under the base directory of the WE2E test configuration files (i.e. -# test_configs_basedir). We require that the target be located in one -# of the subdirectories under test_configs_basedir (or directly under -# test_configs_basedir itself) because we don't want to deal with tests -# that have configuration files that may be located anywhere in the file -# system; for simplicity, we want all configuration files to be placed -# somewhere under test_configs_basedir. -# -# Note that the bash parameter expansion ${var/search/replace} returns -# $var but with the first instance of "search" replaced by "replace" if -# the former is found in $var. Otherwise, it returns the original $var. -# If "replace" is omitted, then "search" is simply deleted. Thus, in -# the if-statement below, if ${target_dir/${test_configs_basedir}/} -# returns ${target_dir} without changes (in which case the test in the -# if-statment will evaluate to true), it means ${test_configs_basedir} -# was not found within ${target_dir}. That in turn means ${target_dir} -# is not a location under ${test_configs_basedir}. In this case, print -# out a warning and exit. -# - if [ "${target_dir}" = "${target_dir/${test_configs_basedir}/}" ]; then - cwd="$(pwd)" - print_err_msg_exit "\ -The symlink (crnt_item) in the current directory (cwd) has a target -(target_fp) located in a directory (target_dir) that is not somewhere -under the WE2E tests base directory (test_configs_basedir): - cwd = \"${cwd}\" - crnt_item = \"${crnt_item}\" - target_fp = \"${target_fp}\" - target_dir = \"${target_dir}\" - test_configs_basedir = \"${test_configs_basedir}\" -For clarity, we require all WE2E test configuration files to be located -somewhere under test_configs_basedir (either directly in this base -directory on in a subdirectory). Please correct and rerun." - fi -# -# Finally, check whether the name of the target file is in the expected -# format "config.${test_name}.sh" for a WE2E test configuration file. -# If not, print out a warning and exit. -# - target_fn=$( basename "${target_fp}" ) - target_test_name_or_null=$( printf "%s\n" "${target_fn}" | \ - sed -n -r -e "s/${regex_search}/\1/p" ) - if [ -z "${target_test_name_or_null}" ]; then - cwd="$(pwd)" - print_err_msg_exit "\ -The symlink (crnt_item) in the current directory (cwd) has a target -(target_fn; located in the directory target_dir) with a name that is -not in the form \"config.[test_name].sh\" expected for a WE2E test -configuration file: - cwd = \"${cwd}\" - crnt_item = \"${crnt_item}\" - target_dir = \"${target_dir}\" - target_fn = \"${target_fn}\" -Please either rename the target to have the form specified above or -remove the symlink, then rerun." - fi -# -# Now that all the checks above have succeeded, for later use save the -# name of the WE2E test that the target represents in the array -# alt_test_prim_test_names. -# - alt_test_prim_test_names+=("${target_test_name_or_null}") -# -#----------------------------------------------------------------------- -# -# If the current item is not a symlink... -# -#----------------------------------------------------------------------- -# - else -# -# Check if the current item is a "regular" file (as defined by bash) and -# thus not a directory or some other exotic entity. If it is a regular -# file, save the corresponding WE2E test name and category subdirectory -# in the arrays prim_test_names and prim_test_subdirs, respectively. -# Also, set its test ID and save it in the array prim_test_ids. If the -# current item is not a regular file, print out a warning and exit. -# - if [ -f "${crnt_item}" ]; then - prim_test_names+=("${test_name_or_null}") - prim_test_subdirs+=("${subdir}") - test_id=$((test_id+1)) - prim_test_ids+=("${test_id}") - else - cwd="$(pwd)" - print_err_msg_exit "\ -The item (crnt_item) in the current directory (cwd) is not a symlink, -but it is also not a \"regular\" file (i.e. it fails bash's -f conditional -operator): - cwd = \"${cwd}\" - crnt_item = \"${crnt_item}\" - [ -f "${crnt_item}" ] = $([ -f "${crnt_item}" ]) -This is probably because it is a directory. Please correct and rerun." - fi - - fi - - fi - - done - - done -# -# For later use, save the number of primary and alternate test names in -# variables. -# - num_prim_tests="${#prim_test_names[@]}" - num_alt_tests="${#alt_test_names[@]}" -# -# Change location back to original directory. -# - cd_vrfy "${orig_dir}" -# -#----------------------------------------------------------------------- -# -# Create the array test_names that contains both the primary and alternate -# test names found above (with the list of primary names first followed -# by the list of alternate names). Also, create the array test_subdirs -# that contains the category subdirectories corresponding to these test -# names. -# -#----------------------------------------------------------------------- -# - test_names=("${prim_test_names[@]}") - test_subdirs=("${prim_test_subdirs[@]}") - if [ "${num_alt_tests}" -gt "0" ]; then - test_names+=("${alt_test_names[@]:-}") - test_subdirs+=("${alt_test_subdirs[@]:-}") - fi -# -#----------------------------------------------------------------------- -# -# For simplicity, make sure that each test name (either primary or -# alternate) appears exactly once in the array test_names. This is -# equivalent to requiring that a test configuration file (ordinary file -# or symlink) corresponding to each name appear exactly once anywhere -# under the base directory test_configs_basedir. -# -#----------------------------------------------------------------------- -# - num_tests="${#test_names[@]}" - for (( i=0; i<=$((num_tests-1)); i++ )); do - - test_name="${test_names[$i]}" - - subdirs=() - num_occurrences=0 - for (( j=0; j<=$((num_tests-1)); j++ )); do - if [ "${test_names[$j]}" = "${test_name}" ]; then - num_occurrences=$((num_occurrences+1)) - subdirs+=("${test_subdirs[$j]}") - fi - done - - if [ "${num_occurrences}" -ne "1" ]; then - print_err_msg_exit "\ -There must be exactly one WE2E test configuration file (which may be a -ordinary file or a symlink) corresponding to each test name anywhere -under the base directory test_configs_basedir. However, the number of -configuration files (num_occurences) corresponding to the current test -name (test_name) is not 1: - test_configs_basedir = \"${test_configs_basedir}\" - test_name = \"${test_name}\" - num_occurrences = ${num_occurrences} -These configuration files all have the name - \"config.${test_name}.yaml\" -and are located in the following category subdirectories under -test_configs_basedir: - subdirs = ( $( printf "\"%s\" " "${subdirs[@]}" )) -Please rename or remove all but one of these configuration files so that -they correspond to unique test names and rerun." - fi - - done -# -#----------------------------------------------------------------------- -# -# If the input argument outvarname_test_descs is not set to a null string -# (meaning that the name of the array in which to return the WE2E test -# descriptions is specified in the call to this function), or if the flag -# generate_csv_file is set to "TRUE", we need to obtain the WE2E test -# descriptions from the test configuration files. In these cases, set -# the local variable get_test_descs to "TRUE". Otherwise, set it to -# "FALSE". -# -#----------------------------------------------------------------------- -# - get_test_descs="FALSE" - if [ ! -z "${outvarname_test_descs}" ] || \ - [ "${generate_csv_file}" = "TRUE" ]; then - get_test_descs="TRUE" - fi -# -#----------------------------------------------------------------------- -# -# If get_test_descs is set to "TRUE", loop through all the primary test -# names and extract from the configuration file of each the description -# of the test. This is assumed to be a section of (bash) comment lines -# at the top of the configuration file. Then append the test description -# to the array prim_test_descs. Note that we assume the first non-comment -# line at the top of the configuration file indicates the end of the test -# description header. -# -#----------------------------------------------------------------------- -# - if [ "${get_test_descs}" = "TRUE" ]; then -# -# Specify in "vars_to_extract" the list of experiment variables to extract -# from each test configuration file (and later to place in the CSV file). -# Recall that the rows of the CSV file correspond to the various WE2E -# tests, and the columns correspond to the test name, description, and -# experiment variable values. The elements of "vars_to_extract" should -# be the names of SRW App experiment variables that are (or can be) -# specified in the App's configuration file. Note that if a variable is -# not specified in the test configuration file, in most cases its value -# is set to an empty string (and recorded as such in the CSV file). In -# some cases, it is set to some other value (e.g. for the number of -# ensemble members NUM_ENS_MEMBERS, it is set to 1). -# - vars_to_extract=( "PREDEF_GRID_NAME" \ - "CCPP_PHYS_SUITE" \ - "EXTRN_MDL_NAME_ICS" \ - "EXTRN_MDL_NAME_LBCS" \ - "DATE_FIRST_CYCL" \ - "DATE_LAST_CYCL" \ - "INCR_CYCL_FREQ" \ - "FCST_LEN_HRS" \ - "LBC_SPEC_INTVL_HRS" \ - "NUM_ENS_MEMBERS" \ - ) - num_vars_to_extract="${#vars_to_extract[@]}" -# -# Create names of local arrays that will hold the value of the corresponding -# variable for each test. Then use these names to define them as empty -# arrays. [The arrays named "prim_..." are to hold values for only the -# primary tests, while other arrays are to hold values for all (primary -# plus alternate) tests.] -# - prim_array_names_vars_to_extract=( $( printf "prim_test_%s_vals " "${vars_to_extract[@]}" ) ) - array_names_vars_to_extract=( $( printf "%s_vals " "${vars_to_extract[@]}" ) ) - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - cmd="local ${prim_array_names_vars_to_extract[$k]}=()" - eval $cmd - cmd="local ${array_names_vars_to_extract[$k]}=()" - eval $cmd - done - - print_info_msg " -Gathering test descriptions and experiment variable values from the -configuration files of the primary WE2E tests... -" - - prim_test_descs=() - for (( i=0; i<=$((num_prim_tests-1)); i++ )); do - - test_name="${prim_test_names[$i]}" - print_info_msg "\ - Reading in the test description for primary WE2E test: \"${test_name}\" - In category (subdirectory): \"${subdir}\" -" - subdir=("${prim_test_subdirs[$i]}") - cd_vrfy "${test_configs_basedir}/$subdir" -# -# Keep reading lines from the current test's configuration line until -# a line is encountered that does not start with zero or more spaces, -# followed by the hash symbol (which is the bash comment character) -# possibly followed by a single space character. -# -# In the while-loop below, we read in every such line, strip it of any -# leading spaces, the hash symbol, and possibly another space and append -# what remains to the local variable test_desc. -# - config_fn="config.${test_name}.yaml" - config_fp="${test_configs_basedir}/$subdir/$config_fn" - test_desc="$(config_to_yaml_str $config_fp -k "metadata")" - test_desc="${test_desc:27}" - test_desc="${test_desc::${#test_desc}-1}" -# -# Finally, save the description of the current test as the next element -# of the array prim_test_descs. -# - prim_test_descs+=("${test_desc}") -# -# Get from the current test's configuration file the values of the -# variables specified in "vars_to_extract". Then save the value in the -# arrays specified by "prim_array_names_vars_to_extract". -# - config_content=$(config_to_shell_str $config_fp) - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - - var_name="${vars_to_extract[$k]}" - set +e - cmd=$( grep "^[ ]*${var_name}=" <<< "${config_content}" ) - set -e - eval $cmd - - if [ -z "${!var_name+x}" ]; then - - msg=" - The variable \"${var_name}\" is not defined in the current test's - configuration file (config_fn): - config_fn = \"${config_fn}\" - Setting the element in the array \"${prim_array_names_vars_to_extract[$k]}\" - corresponding to this test to" - - case "${var_name}" in - - "NUM_ENS_MEMBERS") - default_val="1" - msg=$msg": - ${var_name} = \"${default_val}\"" - ;; - - "INCR_CYCL_FREQ") - default_val="24" - msg=$msg": - ${var_name} = \"${default_val}\"" - ;; - - *) - default_val="" - msg=$msg" an empty string." - ;; - - esac - cmd="${var_name}=\"${default_val}\"" - eval $cmd - - print_info_msg "$verbose" "$msg" - cmd="${prim_array_names_vars_to_extract[$k]}+=(\"'${default_val}\")" - - else -# -# The following are important notes regarding how the variable "cmd" -# containing the command that will append an element to the array -# specified by ${prim_array_names_vars_to_extract[$k]} is formulated: -# -# 1) If all the experiment variables were scalars, then the more complex -# command below could be replaced with the following: -# -# cmd="${prim_array_names_vars_to_extract[$k]}+=(\"${!var_name}\")" -# -# But some variables are arrays, so we need the more complex approach -# to cover those cases. -# -# 2) The double quotes (which need to be escaped here, i.e. \") are needed -# so that for any experiment variables that are arrays, all the elements of -# the array are combined together and treated as a single element. For -# example, if a variable CYCL_HRS is set to the array ("00" "12"), we want -# the value saved in the local array here to be a single element consisting -# of "00 12". Otherwise, "00" and "12" will be treated as separate -# elements, and more than one element would be added to the array (which -# would be incorrect here). -# -# 3) The single quote (which needs to be escaped here, i.e. \') is needed -# so that any numbers (e.g. a set of cycle hours such as "00 12") are -# treated as strings when the CSV file is opened in Google Sheets. -# If this is not done, Google Sheets will remove leading zeros. -# - var_name_at="${var_name}[@]" - cmd="${prim_array_names_vars_to_extract[$k]}+=(\'\"${!var_name_at}\")" - fi - eval $cmd - - done -# -# Calculate the number of forecasts that will be launched by the current -# test. The "10#" forces bash to treat the following number as a decimal -# (not hexadecimal, etc). Note that INCR_CYCL_FREQ is in units of hours, -# so the factor of 3600 is needed to convert the number of seconds to hours. -# - # Convert cycles to seconds - if [[ $DATE_FIRST_CYCL != [0-9]* ]]; then - DATE_FIRST_CYCL=$(eval ${DATE_FIRST_CYCL}) - fi - if [[ $DATE_LAST_CYCL != [0-9]* ]]; then - DATE_LAST_CYCL=$(eval ${DATE_LAST_CYCL}) - fi - first=$(date --utc --date "${DATE_FIRST_CYCL:0:8} ${DATE_FIRST_CYCL:8:2}" +"%s") - last=$(date --utc --date "${DATE_LAST_CYCL:0:8} ${DATE_LAST_CYCL:8:2}" +"%s") - # Diff and convert seconds to number of cycles where freq is in - # hours - nf=$(( ($last - $first) / 3600 / 10#${INCR_CYCL_FREQ} )) -# -# Save the number of forecasts launched by the current test in an -# appropriately named array. In the following, the single quote at the -# beginning forces Google Sheets to interpret this quantity as a string. -# This prevents any automatic number fomatting from being applied when -# the CSV file is imported into Google Sheets. -# - prim_test_num_fcsts+=( "'$nf" ) -# -#----------------------------------------------------------------------- -# -# Calculate the relative dynamics cost of the test, i.e. the relative -# cost of running only the dynamics portion of the forecast model. Here, -# we define the absolute cost of running the dynamics as -# -# abs_cost = nx*ny*num_time_steps*num_fcsts -# -# where nx and ny are the horizontal dimensions of the grid, num_time_steps -# is the number of time steps that need to be taken to complete one -# forecast within the test, and num_fcsts are the number of forecasts -# the test makes (e.g. if the test performs an ensemble forecast, the -# value of this parameter will be greater than 1). -# -# The relative cost is obtained by dividing the absolute cost of a test -# by the absolute cost of a reference 6-hour forecast on the RRFS_CONUS_25km -# predefined grid using the default time step for that grid. This is -# calculated later below and saved in the variable abs_cost_ref. Thus, -# the relative cost is given by -# -# rel_cost = abs_cost/abs_cost_ref -# -# defined as abs_cost_ref. -# -# Note that the (absolute or relative) cost defined here does not take -# into account the costs of running different physics suites, nor does -# it take into account the costs of workflow tasks other than the forecast -# task (e.g. generation of initial and boundary conditions, post processing, -# verification, etc; that is why it is referred to as the relative DYNAMICS -# cost). Note also that if in the future the number of levels in the -# vertical becomes a user-specified parameter, that will also have to be -# added to the definition of the cost. -# -#----------------------------------------------------------------------- -# - -# -# To calculate the absolute cost as defined above, we need the number of -# points in the two horizontal directions, nx and ny. Also, to calculate -# the number of time steps, we need the size of the time step (dt_atmos). -# These depend on the grid being used and must be extracted from the grid -# parameters. The way the latter are obtained depends on whether or not -# a predefined grid is being used. -# -params=$(\ - PREDEF_GRID_NAME="${PREDEF_GRID_NAME}" \ - QUILTING="FALSE" \ - $USHdir/calculate_cost.py -c "${test_configs_basedir}/$subdir/${config_fn}") - -read dta nxny dta_r nxny_r <<< "${params}" - -# -# Save the value of dta (which is just dt_atmos) in an array. The single -# quote at the beginning forces Google Sheets to interpret this quantity -# as a string. This prevents any automatic number fomatting from being -# applied when the CSV file is imported into Google Sheets. -# - prim_test_dt_atmos+=( "'${dta}" ) -# -# Calculate the total number of horizontal grid points. -# - num_grid_pts=$nxny -# -# Calculate the number of time steps for the test. Note that FCST_LEN_HRS -# is in units of hours while dta is in units of seconds. Also, the factor -# dta - 1 in the numerator is to cause the division to round up to the -# nearest integer (adding the denominator minus one to the numerator will -# make this happen). -# - num_time_steps=$(( (FCST_LEN_HRS*3600 + dta - 1)/dta )) -# -# Calculate the absolute cost of the test. -# - ac=$(( num_grid_pts*num_time_steps*nf )) -# -# Save the absolute cost for this test in the array that will eventually -# contain the relative cost. The values in this array will be divided -# by abs_cost_ref later below to obtain relative costs. -# - prim_test_rel_cost+=( "$ac" ) -# -# Unset the experiment variables defined for the current test so that -# they are not accidentally used for the next one. -# - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - var_name="${vars_to_extract[$k]}" - cmd="unset ${var_name}" - eval $cmd - done - - done # End loop over primary tests -# -#----------------------------------------------------------------------- -# -# Normalize the absolute costs calculated above for each test by the -# absolute cost of a reference 6-hour forecast on the RRFS_CONUS_25km -# predefined grid (using the default time step for that grid). -# -#----------------------------------------------------------------------- -# - num_grid_pts=$nxny_r - fcst_len_hrs="6" - num_time_steps=$(( (fcst_len_hrs*3600 + dta_r - 1)/dta_r )) - abs_cost_ref=$(( num_grid_pts*num_time_steps )) - - for (( i=0; i<=$((num_prim_tests-1)); i++ )); do -# -# In the following, the single quote at the beginning forces Google Sheets -# to interpret this quantity as a string. This prevents any automatic -# number fomatting from being applied when the CSV file is imported into -# Google Sheets. -# - prim_test_rel_cost[$i]="'"$( printf "%g" \ - $( bc -l <<< " ${prim_test_rel_cost[$i]}/${abs_cost_ref}" ) ) - done - - fi -# -#----------------------------------------------------------------------- -# -# Create the arrays test_ids and test_descs that initially contain the -# test IDs and descriptions corresponding to the primary test names -# (those of the alternate test names will be appended below). Then, in -# the for-loop, do same for the arrays containing the experiment variable -# values for each test. -# -#----------------------------------------------------------------------- -# - test_ids=("${prim_test_ids[@]}") - if [ "${get_test_descs}" = "TRUE" ]; then - test_descs=("${prim_test_descs[@]}") - num_fcsts=("${prim_test_num_fcsts[@]}") - dt_atmos=("${prim_test_dt_atmos[@]}") - rel_cost=("${prim_test_rel_cost[@]}") - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - cmd="${array_names_vars_to_extract[$k]}=(\"\${${prim_array_names_vars_to_extract[$k]}[@]}\")" - eval $cmd - done - fi -# -#----------------------------------------------------------------------- -# -# Append to the arrays test_ids and test_descs the test IDs and descriptions -# of the alternate test names. We set the test ID and description of -# each alternate test name to those of the corresponding primary test -# name. Then, in the inner for-loop, do the same for the arrays containing -# the experiment variable values. -# -#----------------------------------------------------------------------- -# - for (( i=0; i<=$((num_alt_tests-1)); i++ )); do - - alt_test_name="${alt_test_names[$i]}" - alt_test_subdir=("${alt_test_subdirs[$i]}") - target_prim_test_name="${alt_test_prim_test_names[$i]}" - - num_occurrences=0 - for (( j=0; j<=$((num_prim_tests-1)); j++ )); do - if [ "${prim_test_names[$j]}" = "${target_prim_test_name}" ]; then - test_ids+=("${prim_test_ids[$j]}") - if [ "${get_test_descs}" = "TRUE" ]; then - test_descs+=("${prim_test_descs[$j]}") - num_fcsts+=("${prim_test_num_fcsts[$j]}") - dt_atmos+=("${prim_test_dt_atmos[$j]}") - rel_cost+=("${prim_test_rel_cost[$j]}") - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - cmd="${array_names_vars_to_extract[$k]}+=(\"\${${prim_array_names_vars_to_extract[$k]}[$j]}\")" - eval $cmd - done - fi - num_occurrences=$((num_occurrences+1)) - fi - done - - if [ "${num_occurrences}" -ne 1 ]; then - print_err_msg_exit "\ -Each alternate test name must have a corresponding primary test name that -occurs exactly once in the full list of primary test names. For the -current alternate test name (alt_test_name), the number of occurrences -(num_occurrences) of the corresponding primary test name (target_prim_test_name) -is not 1: - alt_test_name = \"${alt_test_name}\" - target_prim_test_name = \"${target_prim_test_name}\" - num_occurrences = \"${num_occurrences}\" -Please correct and rerun." - fi - - done -# -#----------------------------------------------------------------------- -# -# Sort in order of increasing test ID the arrays containing the names, -# IDs, category subdirectories, and descriptions of the WE2E tests as -# well as the arrays containing the experiment variable values for each -# test. -# -# For this purpose, we first create an array (test_ids_and_inds) each -# of whose elements consist of the test ID, the test type, and the index -# of the array element (with a space used as delimiter). The test type -# is simply an identifier to distinguish between primary test names and -# alternate (symlink-derived) ones. For the former, we set the test -# type to "A", and for the latter, we set it to "B". We do this in order -# to obtain a sorted result in which the elements are not only sorted by -# test ID but also sorted by test type such that within each group of -# elements/tests that has the same test ID, the primary test name is -# listed first followed by zero or more alternte test names. -# -# Next, we sort the array test_ids_and_inds using the "sort" utility -# and save the result in the new array test_ids_and_inds_sorted. The -# latter will be sorted according to test ID because that is the first -# quantity on each line (element) of the original array test_ids_and_inds. -# Also, as described above, for each group of test names that have the -# same ID, the names will be sorted such that the primary test name is -# listed first. -# -# Finally, we extract from test_ids_and_inds_sorted the second number -# in each element (the one after the first number, which is the test ID, -# and the test type, which we no longer need), which is the original -# array index before sorting, and save the results in the array sort_inds. -# This array will contain the original indices in sorted order that we -# then use to sort the arrays containing the WE2E test names, IDs, -# subdirectories, descriptions, and experiment variable values. -# -#----------------------------------------------------------------------- -# - test_ids_and_inds=() - for (( i=0; i<=$((num_tests-1)); i++ )); do - test_type="A" - if [ "$i" -ge "${num_prim_tests}" ]; then - test_type="B" - fi - test_ids_and_inds[$i]="${test_ids[$i]} ${test_type} $i" - done - - readarray -t "test_ids_and_inds_sorted" < \ - <( printf "%s\n" "${test_ids_and_inds[@]}" | sort --numeric-sort ) - - sort_inds=() - regex_search="^[ ]*([0-9]*)[ ]*[AB][ ]*([0-9]*)$" - for (( i=0; i<=$((num_tests-1)); i++ )); do - sort_inds[$i]=$( printf "%s" "${test_ids_and_inds_sorted[$i]}" | \ - sed -n -r -e "s/${regex_search}/\2/p" ) - done - - local test_names_orig=( "${test_names[@]}" ) - local test_subdirs_orig=( "${test_subdirs[@]}" ) - local test_ids_orig=( "${test_ids[@]}" ) - for (( i=0; i<=$((num_tests-1)); i++ )); do - ii="${sort_inds[$i]}" - test_names[$i]="${test_names_orig[$ii]}" - test_subdirs[$i]="${test_subdirs_orig[$ii]}" - test_ids[$i]="${test_ids_orig[$ii]}" - done - - if [ "${get_test_descs}" = "TRUE" ]; then - - local test_descs_orig=( "${test_descs[@]}" ) - local num_fcsts_orig=( "${num_fcsts[@]}" ) - local dt_atmos_orig=( "${dt_atmos[@]}" ) - local rel_cost_orig=( "${rel_cost[@]}" ) - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - cmd="local ${array_names_vars_to_extract[$k]}_orig=(\"\${${array_names_vars_to_extract[$k]}[@]}\")" - eval $cmd - done - - for (( i=0; i<=$((num_tests-1)); i++ )); do - ii="${sort_inds[$i]}" - test_descs[$i]="${test_descs_orig[$ii]}" - num_fcsts[$i]="${num_fcsts_orig[$ii]}" - dt_atmos[$i]="${dt_atmos_orig[$ii]}" - rel_cost[$i]="${rel_cost_orig[$ii]}" - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - cmd="${array_names_vars_to_extract[$k]}[$i]=\"\${${array_names_vars_to_extract[$k]}_orig[$ii]}\"" - eval $cmd - done - done - - fi -# -#----------------------------------------------------------------------- -# -# If generate_csv_file is set to "TRUE", generate a CSV (comma-separated -# value) file containing information about the WE2E tests. This file -# can be opened in a spreadsheet in Google Sheets (and possibly Microsoft -# Excel as well) to view information about all the WE2E tests. Note that -# in doing so, the user must specify the field delimiter to be the same -# character that csv_delimiter is set to below. -# -#----------------------------------------------------------------------- -# - if [ "${generate_csv_file}" = "TRUE" ]; then -# -# If a CSV file already exists, delete it. -# - rm_vrfy -f "${csv_fp}" -# -# Set the character used to delimit columns in the CSV file. This has -# to be something that would normally not appear in the fields being -# written to the CSV file. -# - csv_delimiter="|" -# -# Set the titles of the columns that will be in the file. Then write -# them to the file. The contents of the columns are described in more -# detail further below. -# - column_titles="\ -\"Test Name -(Subdirectory)\" ${csv_delimiter} \ -\"Alternate Test Names -(Subdirectories)\" ${csv_delimiter} \ -\"Test Purpose/Description\" ${csv_delimiter} \ -\"Relative Cost of Running Dynamics -(1 corresponds to running a 6-hour forecast on the RRFS_CONUS_25km predefined grid using the default time step)\" ${csv_delimiter} \ -\"Number of Forecast Model Runs\"" - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - - crnt_title="${vars_to_extract[$k]}" - # - # Add units for select fields. - # - units="" - case "${vars_to_extract[$k]}" in - "INCR_CYCL_FREQ") - units="[hr]" - ;; - "FCST_LEN_HRS") - units="[hr]" - ;; - "LBC_SPEC_INTVL_HRS") - units="[hr]" - ;; - esac - crnt_title="${crnt_title}${units:+ $units}" - - column_titles="${column_titles} ${csv_delimiter} \"${crnt_title}\"" - # - # Insert a column for DT_ATMOS right after the one for FCST_LEN_HRS. - # - if [ "${vars_to_extract[$k]}" = "FCST_LEN_HRS" ]; then - units="[sec]" - crnt_title="DT_ATMOS${units:+ $units}" - column_titles="${column_titles} ${csv_delimiter} \"${crnt_title}\"" - fi - - done - printf "%s\n" "${column_titles}" >> "${csv_fp}" -# -# Loop through the arrays containing the WE2E test information. Extract -# the necessary information and record it to the CSV file row-by-row. -# Note that each row corresponds to a primary test. When an alternate -# test is encountered, its information is stored in the row of the -# corresponding primary test (i.e. a new row is not created). -# - j=0 - jp1=$((j+1)) - while [ "$j" -lt "${num_tests}" ]; do -# -# Get the primary name of the test and the category subdirectory in which -# it is located. -# - prim_test_name_subdir="${test_names[$j]}"$'\n'"(${test_subdirs[$j]})" -# -# Get the test ID. -# - test_id="${test_ids[$j]}" -# -# Get the test description. -# - test_desc="${test_descs[$j]}" -# -# Replace any double-quotes in the test description with two double-quotes -# since this is the way a double-quote is escaped in a CSV file, at least -# a CSV file that is read in by Google Sheets. -# - test_desc=$( printf "%s" "${test_desc}" | sed -r -e "s/\"/\"\"/g" ) -# -# Get the time step. -# - dta="${dt_atmos[$j]}" -# -# Get the relative cost. -# - rc="${rel_cost[$j]}" -# -# Get the number of forecasts (number of times the forcast model is run). -# - nf="${num_fcsts[$j]}" -# -# In the following inner while-loop, we step through all alternate test -# names (if any) that follow the current primary name and construct a -# string (alt_test_names_subdirs) consisting of all the alternate test -# names for this primary name, with each followed by the subdirectory -# the corresponding symlink is in. Note that when the CSV file is opened -# as a spreadsheet (e.g. in Google Sheets), this alternate test name -# information all appears in one cell of the spreadsheet. -# - alt_test_names_subdirs="" - while [ "$jp1" -lt "${num_tests}" ]; do - test_id_next="${test_ids[$jp1]}" - if [ "${test_id_next}" -eq "${test_id}" ]; then - alt_test_names_subdirs="${alt_test_names_subdirs}${test_names[$jp1]}"$'\n'"(${test_subdirs[$jp1]})"$'\n' - j="$jp1" - jp1=$((j+1)) - else - break - fi - done -# Remove trailing newline. - alt_test_names_subdirs="${alt_test_names_subdirs%$'\n'}" -# -# Write a line to the CSV file representing a single row of the spreadsheet. -# This row contains the following columns: -# -# Column 1: -# The primary test name followed by the category subdirectory it is -# located in (the latter in parentheses). -# -# Column 2: -# Any alternate test names followed by their category subdirectories (in -# parentheses). Each alternate test name and subdirectory pair is followed -# by a newline, but all lines will appear in a single cell of the spreadsheet. -# -# Column 3: -# The test description. -# -# Column 4: -# The relative cost of running the dynamics in the test. See above for -# details. -# -# Column 5: -# The number of times the forecast model will be run by the test. This -# is calculated using quantities such as the number of cycle dates (i.e. -# forecast model start dates) and the number of of ensemble members (which -# is greater than 1 if running ensemble forecasts and 1 otherwise). The -# latter are in turn obtained directly or indirectly from the quantities -# in Columns 6, 7, .... -# -# Columns 6, 7, ...: -# The values of the experiment variables specified in vars_to_extract, -# plus DT_ATMOS (included right after FCST_LEN_HRS). Note that DT_ATMOS -# cannot be included in vars_to_extract because it is usually not in the -# WE2E test configuration file where this script looks for these variables -# (because most of the tests use predefined grids, and for those cases, -# DT_ATMOS is defined in the same file/script where the other grid -# parameters are defined). -# - row_content="\ -\"${prim_test_name_subdir}\" ${csv_delimiter} \ -\"${alt_test_names_subdirs}\" ${csv_delimiter} \ -\"${test_desc}\" ${csv_delimiter} \ -\"${rc}\" ${csv_delimiter} \ -\"${nf}\"" - - for (( k=0; k<=$((num_vars_to_extract-1)); k++ )); do - - unset "val" - cmd="val=\"\${${array_names_vars_to_extract[$k]}[$j]}\"" - eval $cmd - row_content="${row_content} ${csv_delimiter} \"${val}\"" -# -# Insert value of DT_ATMOS right after value of FCST_LEN_HRS. -# - if [ "${vars_to_extract[$k]}" = "FCST_LEN_HRS" ]; then - row_content="${row_content} ${csv_delimiter} \"${dta}\"" - fi - - done - - printf "%s\n" "${row_content}" >> "${csv_fp}" -# -# Update loop indices. -# - j="$jp1" - jp1=$((j+1)) - - done - - print_info_msg "\ -Successfully generated a CSV (Comma Separated Value) file (csv_fp) -containing information on all WE2E tests: - csv_fp = \"${csv_fp}\"" - - fi -# -#----------------------------------------------------------------------- -# -# Use the eval function to set this function's output variables. Note -# that each of these is set only if the corresponding input variable -# specifying the name to use for the output variable is not empty. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_test_configs_basedir}" ]; then - eval ${outvarname_test_configs_basedir}="${test_configs_basedir}" - fi - - if [ ! -z "${outvarname_test_names}" ]; then - test_names_str="( "$( printf "\"%s\" " "${test_names[@]}" )")" - eval ${outvarname_test_names}="${test_names_str}" - fi - - if [ ! -z "${outvarname_test_subdirs}" ]; then - test_subdirs_str="( "$( printf "\"%s\" " "${test_subdirs[@]}" )")" - eval ${outvarname_test_subdirs}="${test_subdirs_str}" - fi - - if [ ! -z "${outvarname_test_ids}" ]; then - test_ids_str="( "$( printf "\"%s\" " "${test_ids[@]}" )")" - eval ${outvarname_test_ids}="${test_ids_str}" - fi - - if [ ! -z "${outvarname_test_descs}" ]; then - test_descs_str="( "$( printf "'%s' " "${test_descs[@]}" )")" - eval ${output_varname_test_descs}="${test_descs_str}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script or -# function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - diff --git a/tests/WE2E/get_expts_status.sh b/tests/WE2E/get_expts_status.sh deleted file mode 100755 index de326589ca..0000000000 --- a/tests/WE2E/get_expts_status.sh +++ /dev/null @@ -1,475 +0,0 @@ -#!/bin/bash - -# -#----------------------------------------------------------------------- -# -# This script updates and reports back the workflow status of all active -# forecast experiments under a specified base directory (expts_basedir). -# It must be supplied exactly one argument, which is the full path to the -# experiments base directory. -# -# The script first determines which of the subdirectories under the base -# directory represent active experiments (see below for how this is done). -# For all such experiments, it calls the workflow (re)launch script to -# update the status of the workflow and prints the status out to screen. -# It also generates a status report file in the base directory that -# contains the last num_log_lines lines (defined below) of each experiment's -# workflow log file [which is generated by the (re)launch script] and thus -# has information on which tasks may have succeeded/failed]. -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Do not allow uninitialized variables. -# -#----------------------------------------------------------------------- -# -set -u -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# -scrfunc_fp=$( readlink -f "${BASH_SOURCE[0]}" ) -scrfunc_fn=$( basename "${scrfunc_fp}" ) -scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# The current script should be located in the "tests" subdirectory of the -# workflow's top-level directory, which we denote by HOMEdir. Thus, -# HOMEdir is the directory one level above the directory in which the -# current script is located. Set HOMEdir accordingly. -# -#----------------------------------------------------------------------- -# -HOMEdir=${scrfunc_dir%/*/*} -# -#----------------------------------------------------------------------- -# -# Set directories. -# -#----------------------------------------------------------------------- -# -USHdir="$HOMEdir/ush" -# -#----------------------------------------------------------------------- -# -# Source bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -# -#----------------------------------------------------------------------- -# -# Set the usage message. -# -#----------------------------------------------------------------------- -# -usage_str="\ -Usage: - - ${scrfunc_fn} \\ - expts_basedir=\"...\" \\ - [launch_wflows=\"...\"] \\ - [num_log_lines=\"...\"] \\ - [verbose=\"...\"] - -The arguments in brackets are optional. The arguments are defined as -follows: - -expts_basedir: -Full path to the experiments base directory, i.e. the directory containing -the experiment subdirectories. - -launch_wflows: -Optional flag that determines whether each experiment's workflow should -be launched if hasn't already. Should be set to \"TRUE\" or \"FALSE\". -Default is \"FALSE\". - -num_log_lines: -Optional integer specifying the number of lines from the end of the -workflow launch log file (log.launch_FV3LAM_wflow) of each test to -include in the status report file that this script generates. - -verbose: -Optional verbosity flag. Should be set to \"TRUE\" or \"FALSE\". Default -is \"FALSE\". -" -# -#----------------------------------------------------------------------- -# -# Check to see if usage help for this script is being requested. If so, -# print it out and exit with a 0 exit code (success). -# -#----------------------------------------------------------------------- -# -help_flag="--help" -if [ "$#" -eq 1 ] && [ "$1" = "${help_flag}" ]; then - print_info_msg "${usage_str}" - exit 0 -fi -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script or function. -# Then process the arguments provided to it on the command line (which -# should consist of a set of name-value pairs of the form arg1="value1", -# arg2="value2", etc). -# -#----------------------------------------------------------------------- -# -valid_args=( \ - "expts_basedir" \ - "launch_wflows" \ - "num_log_lines" \ - "verbose" \ - ) -process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# Default values for various input arguments. -# -#----------------------------------------------------------------------- -# -launch_wflows=${launch_wflows:-"FALSE"} -num_log_lines=${num_log_lines:-"40"} -verbose=${verbose:-"FALSE"} -# -#----------------------------------------------------------------------- -# -# Make sure "launch_wflows" and "verbose" have valid values. -# -#----------------------------------------------------------------------- -# -launch_wflows=$(boolify "${launch_wflows}") -verbose=$(boolify "$verbose") -# -#----------------------------------------------------------------------- -# -# Verify that the required arguments to this script have been specified. -# If not, print out an error message and exit. -# -#----------------------------------------------------------------------- -# -help_msg="\ -Use - ${scrfunc_fn} ${help_flag} -to get help on how to use this script." - -if [ -z "${expts_basedir}" ]; then - print_err_msg_exit "\ -The argument \"expts_basedir\" specifying the base directory containing -the experiment directories was not specified in the call to this script. \ -${help_msg}" -fi -# -#----------------------------------------------------------------------- -# -# Check that the specified experiments base directory exists and is -# actually a directory. If not, print out an error message and exit. -# -#----------------------------------------------------------------------- -# -if [ ! -d "${expts_basedir}" ]; then - print_err_msg_exit " -The specified experiments base directory (expts_basedir) does not exit -or is not actually a directory: - expts_basedir = \"${expts_basedir}\"" -fi -# -#----------------------------------------------------------------------- -# -# Create an array containing the names of the subdirectories in the -# experiment base directory. -# -#----------------------------------------------------------------------- -# -cd_vrfy "${expts_basedir}" -# -# Get a list of all subdirectories (but not files) in the experiment base -# directory. Note that the ls command below will return a string containing -# the subdirectory names, with each name followed by a backslash and a -# newline. -# -subdirs_list=$( \ls -1 -d */ ) -# -# Remove all backslashes from the ends of the subdirectory names. -# -subdirs_list=$( printf "${subdirs_list}" "%s" | sed -r 's|/||g' ) -# -# Create an array out of the string containing the newline-separated list -# of subdirectories. -# -subdirs_list=( ${subdirs_list} ) -# -#----------------------------------------------------------------------- -# -# Loop through the elements of the array subdirs_list and create an array -# containing a list of all active experiment subdirectories under the -# experiment base directory. These active subdirectories will be further -# processed later below. Here, by "active" experiment subdirectory, we -# mean a subdirectory that (1) contains a forecast experiment (i.e. was -# was created by the experiment generation scripts) and (2) does not -# represent an old experiment whose workflow status is no longer relevant. -# For this purpose, for each element in subdirs_list, we: -# -# 1) Change location to the subdirectory. -# -# 2) Check whether an experiment variable definitions file (var_defns.sh) -# exists. If so, we assume the subdirectory is an experiment directory. -# If not, we assume it is not, in which case the subdirectory will -# not be added to the list of active experiment subdirectories. -# -# 3) If the subdirectory is an experiment directory, ensure that it is -# an active experiment, i.e. that it is not an old experiment that -# has been renamed and whose experiment status is thus irrelevant. -# For this purpose, we source the variable definitions file in order -# to have available the workflow variable EXPT_SUBDIR that contains -# the name of the experiment when it was first created. If this -# matches the name of the current subdirectory, then add the latter -# to the list of active experiment subdirectories; otherwise, do not. -# In the latter case, we are assuming that the original experiment -# subdirectory was renamed (e.g. to something like the orginal name -# with the string "_old001" appended) and thus does not contain an -# active experiment whose workflow status is of interest. -# -# 4) Change location back to the experiments base directory. -# -#----------------------------------------------------------------------- -# -separator="======================================" - -var_defns_fn="var_defns.sh" -j="0" -expt_subdirs=() - -print_info_msg "\ -Checking for active experiment directories in the specified experiments -base directory (expts_basedir): - expts_basedir = \"${expts_basedir}\" -..." - -num_subdirs="${#subdirs_list[@]}" -for (( i=0; i<=$((num_subdirs-1)); i++ )); do - - subdir="${subdirs_list[$i]}" - msg=" -$separator -Checking whether the subdirectory - \"${subdir}\" -contains an active experiment..." - print_info_msg "$verbose" "$msg" - - cd_vrfy "${subdir}" -# -# If a variable definitions file does not exist, print out a message -# and move on to the next subdirectory. -# - if [ ! -f "${var_defns_fn}" ]; then - - print_info_msg "$verbose" " -The current subdirectory (subdir) under the experiments base directory -(expts_basedir) does not contain an experiment variable defintions file -(var_defns_fn): - expts_basedir = \"${expts_basedir}\" - subdir = \"${subdir}\" - var_defns_fn = \"${var_defns_fn}\" -Thus, we will assume it is not an experiment directory and will not add -it to the list of active experiments subdirectories whose workflow status -must be checked." -# -# If a variable definitions file does exist, then... -# - else -# -# Source the variable definitions file. -# - . "./${var_defns_fn}" -# We want a clean output from this script so disable debugging mode - export DEBUG="FALSE" -# -# If the workflow variable EXPT_SUBDIR is the same as the name of the -# current subdirectory, then assume this subdirectory contains an active -# experiment. In this case, print out a message and add its name to the -# list of such experiments. -# - if [ "${EXPT_SUBDIR}" = "$subdir" ]; then - - print_info_msg "$verbose" " -The current subdirectory (subdir) under the experiments base directory -(expts_basedir) contains an active experiment: - expts_basedir = \"${expts_basedir}\" - subdir = \"${subdir}\" -Adding the current subdirectory to the list of active experiment -subdirectories whose workflow status must be checked." - - expt_subdirs[$j]="$subdir" - j=$((j+1)) -# -# If the workflow variable EXPT_SUBDIR is not the same as the name of -# the current subdirectory, then assume this subdirectory contains an -# "inactive" that has been renamed. In this case, print out a message -# and move on to the next subdirectory (whithout adding the the name of -# the currend subdirectory to the list of active experiments). -# - else - - print_info_msg "$verbose" " -The current subdirectory (subdir) under the experiments base directory -(expts_basedir) contains an experiment whose original name (EXPT_SUBDIR) -does not match the name of the current subdirectory: - expts_basedir = \"${expts_basedir}\" - subdir = \"${subdir}\" - EXPT_SUBDIR = \"${EXPT_SUBDIR}\" -Thus, we will assume that the current subdirectory contains an inactive -(i.e. old) experiment whose workflow status is not relevant and will not -add it to the list of active experiment subdirectories whose workflow -status must be checked." - - fi - - fi - - print_info_msg "$verbose" "\ -$separator -" -# -# Change location back to the experiments base directory. -# - cd_vrfy "${expts_basedir}" - -done -# -#----------------------------------------------------------------------- -# -# Get the number of active experiments for which to check the workflow -# status and print out an informational message. -# -#----------------------------------------------------------------------- -# -num_expts="${#expt_subdirs[@]}" -expt_subdirs_str=$( printf " \'%s\'\n" "${expt_subdirs[@]}" ) -print_info_msg " -The number of active experiments found is: - num_expts = ${num_expts} -The list of experiments whose workflow status will be checked is: -${expt_subdirs_str} -" -# -#----------------------------------------------------------------------- -# -# Set the name and full path of the file in which the status report will -# be saved. If such a file already exists, rename it. -# -#----------------------------------------------------------------------- -# -yyyymmddhhmn=$( date +%Y%m%d%H%M ) -expts_status_fn="expts_status_${yyyymmddhhmn}.txt" -expts_status_fp="${expts_basedir}/${expts_status_fn}" - -# Note that the check_for_preexist_dir_file function assumes that there -# is a variable named "VERBOSE" in the environment. Set that before -# calling the function. -VERBOSE="TRUE" -check_for_preexist_dir_file "${expts_status_fp}" "rename" -# -#----------------------------------------------------------------------- -# -# Loop through the elements of the array expt_subdirs. For each element -# (i.e. for each active experiment), change location to the experiment -# directory and call the script launch_FV3LAM_wflow.sh to update the log -# file log.launch_FV3LAM_wflow. Then take the last num_log_lines of -# this log file (along with an appropriate message) and add it to the -# status report file. -# -#----------------------------------------------------------------------- -# -launch_wflow_fn="launch_FV3LAM_wflow.sh" -launch_wflow_log_fn="log.launch_FV3LAM_wflow" - -for (( i=0; i<=$((num_expts-1)); i++ )); do - - expt_subdir="${expt_subdirs[$i]}" - msg="\ -$separator -Checking workflow status of experiment \"${expt_subdir}\" ..." - print_info_msg "$msg" - print_info_msg "$msg" >> "${expts_status_fp}" -# -# Change location to the experiment subdirectory, and check the launch -# log file for status -# - cd_vrfy "${expt_subdir}" - if [ -f "${launch_wflow_log_fn}" ]; then - # - # Print the workflow status to the screen. - # - # The "tail -1" is to get only the last occurrence of "Workflow status" - wflow_status=$( grep "Workflow status:" "${launch_wflow_log_fn}" | tail -1 ) - # Not sure why this doesn't work to strip leading spaces. - # wflow_status="${wflow_status## }" - # Remove leading spaces. - wflow_status=$( printf "${wflow_status}" "%s" | sed -r 's|^[ ]*||g' ) - print_info_msg "${wflow_status}" - print_info_msg "\ -$separator -" - # - # Combine message above with the last num_log_lines lines from the workflow - # launch log file and place the result in the status report file. - # - msg=$msg" -${wflow_status} -The last ${num_log_lines} lines of the workflow launch log file -(\"${launch_wflow_log_fn}\") are: -" - tail -n ${num_log_lines} ${launch_wflow_log_fn} >> "${expts_status_fp}" -# -# If a log file from the launch script is not present in the experiment -# directory, it means the workflow has not been launched. In this case, -# print out an appropriate message. Then, if launch_wflows is set to -# TRUE, launch the workflow and print out further info. -# - else - - wflow_status="Workflow status: NOT LAUNCHED YET" - if [ "${launch_wflows}" = "TRUE" ]; then - wflow_status=${wflow_status}" -Launching workflow using script \"${launch_wflow_fn}\"..." - fi - - print_info_msg "${wflow_status}" - print_info_msg "\ -$separator -" - - msg="${wflow_status} -" - print_info_msg "$msg" >> "${expts_status_fp}" - if [ "${launch_wflows}" = "TRUE" ]; then - ./${launch_wflow_fn} >> "${expts_status_fp}" 2>&1 - fi - - fi -# -# Change location back to the experiments base directory. -# - cd_vrfy "${expts_basedir}" - -done - -print_info_msg "\ -A status report has been created in: - expts_status_fp = \"${expts_status_fp}\" - -DONE." diff --git a/tests/WE2E/monitor_jobs.py b/tests/WE2E/monitor_jobs.py index 8fbd4f2afb..5d1d4a63af 100755 --- a/tests/WE2E/monitor_jobs.py +++ b/tests/WE2E/monitor_jobs.py @@ -3,217 +3,99 @@ import sys import argparse import logging -import subprocess -import sqlite3 import time from textwrap import dedent from datetime import datetime -from contextlib import closing sys.path.append("../../ush") -from python_utils import ( - load_config_file, - cfg_to_yaml_str -) +from python_utils import load_config_file from check_python_version import check_python_version +from utils import calculate_core_hours, write_monitor_file, update_expt_status,\ + update_expt_status_parallel, print_WE2E_summary -def monitor_jobs(expt_dict: dict, monitor_file: str = '', debug: bool = False) -> str: +def monitor_jobs(expts_dict: dict, monitor_file: str = '', procs: int = 1, debug: bool = False) -> str: """Function to monitor and run jobs for the specified experiment using Rocoto Args: - expt_dict (dict): A dictionary containing the information needed to run + expts_dict (dict): A dictionary containing the information needed to run one or more experiments. See example file monitor_jobs.yaml monitor_file (str): [optional] debug (bool): [optional] Enable extra output for debugging Returns: str: The name of the file used for job monitoring (when script is finished, this contains results/summary) - """ starttime = datetime.now() # Write monitor_file, which will contain information on each monitored experiment if not monitor_file: - monitor_file = f'monitor_jobs_{starttime.strftime("%Y%m%d%H%M%S")}.yaml' - logging.info(f"Writing information for all experiments to {monitor_file}") + monitor_file = f'WE2E_tests_{starttime.strftime("%Y%m%d%H%M%S")}.yaml' + logging.info(f"Writing information for all experiments to {monitor_file}") - write_monitor_file(monitor_file,expt_dict) + write_monitor_file(monitor_file,expts_dict) # Perform initial setup for each experiment logging.info("Checking tests available for monitoring...") - for expt in expt_dict: - logging.info(f"Starting experiment {expt} running") - expt_dict[expt] = update_expt_status(expt_dict[expt], expt, True) - write_monitor_file(monitor_file,expt_dict) + if procs > 1: + print(f'Starting experiments in parallel with {procs} processes') + expts_dict = update_expt_status_parallel(expts_dict, procs, True, debug) + else: + for expt in expts_dict: + logging.info(f"Starting experiment {expt} running") + expts_dict[expt] = update_expt_status(expts_dict[expt], expt, True, debug) + + write_monitor_file(monitor_file,expts_dict) - logging.info(f'Setup complete; monitoring {len(expt_dict)} experiments') + logging.info(f'Setup complete; monitoring {len(expts_dict)} experiments') + logging.info('Use ctrl-c to pause job submission/monitoring') #Make a copy of experiment dictionary; will use this copy to monitor active experiments - running_expts = expt_dict.copy() + running_expts = expts_dict.copy() i = 0 while running_expts: i += 1 + if procs > 1: + expts_dict = update_expt_status_parallel(expts_dict, procs) + else: + for expt in running_expts.copy(): + expts_dict[expt] = update_expt_status(expts_dict[expt], expt) + for expt in running_expts.copy(): - expt_dict[expt] = update_expt_status(expt_dict[expt], expt) - running_expts[expt] = expt_dict[expt] - if running_expts[expt]["status"] in ['DEAD','ERROR','COMPLETE']: - logging.info(f'Experiment {expt} is {running_expts[expt]["status"]}; will no longer monitor.') + running_expts[expt] = expts_dict[expt] + if running_expts[expt]["status"] in ['DEAD','ERROR','COMPLETE']: + logging.info(f'Experiment {expt} is {running_expts[expt]["status"]};'\ + 'will no longer monitor.') running_expts.pop(expt) continue - logging.debug(f'Experiment {expt} status is {expt_dict[expt]["status"]}') - + logging.debug(f'Experiment {expt} status is {expts_dict[expt]["status"]}') - write_monitor_file(monitor_file,expt_dict) + write_monitor_file(monitor_file,expts_dict) endtime = datetime.now() total_walltime = endtime - starttime logging.debug(f"Finished loop {i}\nWalltime so far is {str(total_walltime)}") - #Slow things down just a tad between loops so experiments behave better time.sleep(5) - endtime = datetime.now() total_walltime = endtime - starttime - logging.info(f'All {len(expt_dict)} experiments finished in {str(total_walltime)}') - - return monitor_file - -def update_expt_status(expt: dict, name: str, refresh: bool = False) -> dict: - """ - This function reads the dictionary showing the location of a given experiment, runs a - `rocotorun` command to update the experiment (running new jobs and updating the status of - previously submitted ones), and reads the rocoto database file to update the status of - each job for that experiment in the experiment dictionary. - - The function then and uses a simple set of rules to combine the statuses of every task - into a useful "status" for the whole experiment, and returns the updated experiment dictionary. - - Experiment "status" levels explained: - CREATED: The experiments have been created, but the monitor script has not yet processed them. - This is immediately overwritten at the beginning of the "monitor_jobs" function, so we - should never see this status in this function. Including just for completeness sake. - SUBMITTING: All jobs are in status SUBMITTING or SUCCEEDED. This is a normal state; we will - continue to monitor this experiment. - DYING: One or more tasks have died (status "DEAD"), so this experiment has had an error. - We will continue to monitor this experiment until all tasks are either status DEAD or - status SUCCEEDED (see next entry). - DEAD: One or more tasks are at status DEAD, and the rest are either DEAD or SUCCEEDED. We - will no longer monitor this experiment. - ERROR: One or more tasks are at status UNKNOWN, meaning that rocoto has failed to track the - job associated with that task. This will require manual intervention to solve, so we - will no longer monitor this experiment. - This status may also appear if we fail to read the rocoto database file. - RUNNING: One or more jobs are at status RUNNING, and the rest are either status QUEUED, SUBMITTED, - or SUCCEEDED. This is a normal state; we will continue to monitor this experiment. - QUEUED: One or more jobs are at status QUEUED, and some others may be at status SUBMITTED or - SUCCEEDED. - This is a normal state; we will continue to monitor this experiment. - SUCCEEDED: All jobs are status SUCCEEDED; we will monitor for one more cycle in case there are - unsubmitted jobs remaining. - COMPLETE:All jobs are status SUCCEEDED, and we have monitored this job for an additional cycle - to ensure there are no un-submitted jobs. We will no longer monitor this experiment. - - Args: - expt (dict): A dictionary containing the information for an individual experiment, as - described in the main monitor_jobs() function. - name (str): Name of the experiment; used for logging only - refresh (bool): If true, this flag will check an experiment status even if it is listed - as DEAD, ERROR, or COMPLETE. Used for initial checks for experiments - that may have been restarted. - Returns: - dict: The updated experiment dictionary. - """ - - #If we are no longer tracking this experiment, return unchanged - if (expt["status"] in ['DEAD','ERROR','COMPLETE']) and not refresh: - return expt - - # Update experiment, read rocoto database - rocoto_db = f"{expt['expt_dir']}/FV3LAM_wflow.db" - rocotorun_cmd = ["rocotorun", f"-w {expt['expt_dir']}/FV3LAM_wflow.xml", f"-d {rocoto_db}"] - subprocess.run(rocotorun_cmd) - - logging.debug(f"Reading database for experiment {name}, updating experiment dictionary") - try: - # This section of code queries the "job" table of the rocoto database, returning a list - # of tuples containing the taskname, cycle, and state of each job respectively - with closing(sqlite3.connect(rocoto_db)) as connection: - with closing(connection.cursor()) as cur: - db = cur.execute('SELECT taskname,cycle,state from jobs').fetchall() - except: - logging.warning(f"Unable to read database {rocoto_db}\nCan not track experiment {name}") - expt["status"] = "ERROR" - return expt - - for task in db: - # For each entry from rocoto database, store that under a dictionary key named TASKNAME_CYCLE - # Cycle comes from the database in Unix Time (seconds), so convert to human-readable - cycle = datetime.utcfromtimestamp(task[1]).strftime('%Y%m%d%H%M') - expt[f"{task[0]}_{cycle}"] = task[2] - - #Run rocotorun again to get around rocotobqserver proliferation issue - subprocess.run(rocotorun_cmd) - - statuses = list() - for task in expt: - # Skip non-task entries - if task in ["expt_dir","status"]: - continue - statuses.append(expt[task]) - - if "DEAD" in statuses: - still_live = ["RUNNING", "SUBMITTING", "QUEUED"] - if any(status in still_live for status in statuses): - logging.debug(f'DEAD job in experiment {name}; continuing to track until all jobs are complete') - expt["status"] = "DYING" - else: - expt["status"] = "DEAD" - return expt - - if "UNKNOWN" in statuses: - expt["status"] = "ERROR" - - if "RUNNING" in statuses: - expt["status"] = "RUNNING" - elif "QUEUED" in statuses: - expt["status"] = "QUEUED" - elif "SUBMITTING" in statuses: - expt["status"] = "SUBMITTING" - elif "SUCCEEDED" in statuses: - if expt["status"] == "SUCCEEDED": - expt["status"] = "COMPLETE" - else: - expt["status"] = "SUCCEEDED" - else: - logging.fatal("Some kind of horrible thing has happened") - raise ValueError(dedent(f"""Some kind of horrible thing has happened to the experiment status - for experiment {name} - status is {expt["status"]} - all task statuses are {statuses}""")) + logging.info(f'All {len(expts_dict)} experiments finished in {str(total_walltime)}') + logging.info('Calculating core-hour usage and printing final summary') - return expt + # Calculate core hours and update yaml + expts_dict = calculate_core_hours(expts_dict) + write_monitor_file(monitor_file,expts_dict) + #Call function to print summary + print_WE2E_summary(expts_dict, debug) -def write_monitor_file(monitor_file: str, expt_dict: dict): - try: - with open(monitor_file,"w") as f: - f.write("### WARNING ###\n") - f.write("### THIS FILE IS AUTO_GENERATED AND REGULARLY OVER-WRITTEN BY monitor_jobs.py\n") - f.write("### EDITS MAY RESULT IN MISBEHAVIOR OF EXPERIMENTS RUNNING\n") - f.writelines(cfg_to_yaml_str(expt_dict)) - except: - logging.fatal("\n********************************\n") - logging.fatal(f"WARNING WARNING WARNING\nFailure occurred while writing monitor file {monitor_file}") - logging.fatal("File may be corrupt or invalid for re-run!!") - logging.fatal("\n********************************\n") - raise + return monitor_file def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> None: @@ -225,7 +107,7 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N formatter = logging.Formatter("%(name)-16s %(levelname)-8s %(message)s") - fh = logging.FileHandler(logfile, mode='w') + fh = logging.FileHandler(logfile, mode='a') fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logging.getLogger().addHandler(fh) @@ -247,21 +129,35 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N logfile='log.monitor_jobs' #Parse arguments - parser = argparse.ArgumentParser(description="Script for monitoring and running jobs in a specified experiment, as specified in a yaml configuration file\n") - - parser.add_argument('-y', '--yaml_file', type=str, help='YAML-format file specifying the information of jobs to be run; for an example file, see monitor_jobs.yaml', required=True) - parser.add_argument('-d', '--debug', action='store_true', help='Script will be run in debug mode with more verbose output') + parser = argparse.ArgumentParser(description="Script for monitoring and running jobs in a "\ + "specified experiment, as specified in a yaml "\ + "configuration file\n") + + parser.add_argument('-y', '--yaml_file', type=str, + help='YAML-format file specifying the information of jobs to be run; '\ + 'for an example file, see monitor_jobs.yaml', required=True) + parser.add_argument('-p', '--procs', type=int, + help='Run resource-heavy tasks (such as calls to rocotorun) in parallel, '\ + 'with provided number of parallel tasks', default=1) + parser.add_argument('-d', '--debug', action='store_true', + help='Script will be run in debug mode with more verbose output') args = parser.parse_args() setup_logging(logfile,args.debug) - expt_dict = load_config_file(args.yaml_file) + expts_dict = load_config_file(args.yaml_file) + + if args.procs < 1: + raise ValueError('You can not have less than one parallel process; select a valid value for --procs') #Call main function try: - monitor_jobs(expt_dict,args.yaml_file, args.debug) + monitor_jobs(expts_dict,args.yaml_file,args.procs,args.debug) + except KeyboardInterrupt: + logging.info("\n\nUser interrupted monitor script; to resume monitoring jobs run:\n") + logging.info(f"{__file__} -y={args.yaml_file} -p={args.procs}\n") except: logging.exception( dedent( diff --git a/tests/WE2E/print_test_info.py b/tests/WE2E/print_test_info.py new file mode 100755 index 0000000000..f2301bb690 --- /dev/null +++ b/tests/WE2E/print_test_info.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 + +import argparse +import sys + +from utils import print_test_info + +sys.path.append("../../ush") + +if __name__ == "__main__": + + #Parse arguments + parser = argparse.ArgumentParser( + description="Script for parsing all test files in the test_configs/ "\ + "directory, and printing a pipe-delimited summary file of the details of "\ + "each test.\n") + + parser.add_argument('-o', '--output_file', type=str, + help='File name for test details file', default='') + + args = parser.parse_args() + + if args.output_file: + print_test_info(args.output_file) + else: + print_test_info() diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 66df2e8205..b1ef55c9ed 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -6,6 +6,7 @@ import argparse import logging from textwrap import dedent +from datetime import datetime sys.path.append("../../ush") @@ -17,8 +18,8 @@ from check_python_version import check_python_version -from monitor_jobs import monitor_jobs - +from monitor_jobs import monitor_jobs, write_monitor_file +from utils import print_test_info def run_we2e_tests(homedir, args) -> None: """Function to run the WE2E tests selected by the user @@ -64,11 +65,13 @@ def run_we2e_tests(homedir, args) -> None: tests_to_check = [] for f in alltests: filename = os.path.basename(f) - # We just want the test namein this list, so cut out the "config." prefix and ".yaml" extension + # We just want the test name in this list, so cut out the + # "config." prefix and ".yaml" extension tests_to_check.append(filename[7:-5]) logging.debug(f"Will check all tests:\n{tests_to_check}") elif user_spec_tests[0] in ['fundamental', 'comprehensive']: - # I am writing this section of code under protest; we should use args.run_envir to check for run_envir-specific files! + # I am writing this section of code under protest; we should use args.run_envir to + # check for run_envir-specific files! prefix = f"machine_suites/{user_spec_tests[0]}" testfilename = f"{prefix}.{machine}.{args.compiler}.nco" if not os.path.isfile(testfilename): @@ -82,27 +85,31 @@ def run_we2e_tests(homedir, args) -> None: else: if not run_envir: run_envir = 'community' - logging.debug(f'{testfilename} exists for this platform and run_envir has not been specified'\ + logging.debug(f'{testfilename} exists for this platform and run_envir'\ + 'has not been specified\n'\ 'Setting run_envir = {run_envir} for all tests') else: if not run_envir: run_envir = 'nco' - logging.debug(f'{testfilename} exists for this platform and run_envir has not been specified'\ + logging.debug(f'{testfilename} exists for this platform and run_envir has'\ + 'not been specified\n'\ 'Setting run_envir = {run_envir} for all tests') logging.debug(f"Reading test file: {testfilename}") - with open(testfilename) as f: + with open(testfilename, encoding="utf-8") as f: tests_to_check = [x.rstrip() for x in f] logging.debug(f"Will check {user_spec_tests[0]} tests:\n{tests_to_check}") else: - # If we have gotten this far then the only option left for user_spec_tests is a file containing test names + # If we have gotten this far then the only option left for user_spec_tests is a + # file containing test names logging.debug(f'Checking if {user_spec_tests} is a file containing test names') if os.path.isfile(user_spec_tests[0]): - with open(user_spec_tests[0]) as f: + with open(user_spec_tests[0], encoding="utf-8") as f: tests_to_check = [x.rstrip() for x in f] else: raise FileNotFoundError(dedent(f""" The specified 'tests' argument '{user_spec_tests}' - does not appear to be a valid test name, a valid test suite, or a file containing valid test names. + does not appear to be a valid test name, a valid test suite, or a file + containing valid test names. Check your inputs and try again. """)) @@ -143,6 +150,10 @@ def run_we2e_tests(homedir, args) -> None: if 'nco' not in test_cfg: test_cfg['nco'] = dict() test_cfg['nco'].update({"model_ver": "we2e"}) + if args.opsroot: + if 'nco' not in test_cfg: + test_cfg['nco'] = dict() + test_cfg['nco'].update({"OPSROOT": args.opsroot}) # if platform section was not in input config, initialize as empty dict if 'platform' not in test_cfg: test_cfg['platform'] = dict() @@ -162,32 +173,32 @@ def run_we2e_tests(homedir, args) -> None: if args.verbose_tests: test_cfg['workflow'].update({"VERBOSE": args.verbose_tests}) + logging.debug(f"Overwriting WE2E-test-specific settings for test \n{test_name}\n") if 'task_get_extrn_ics' in test_cfg: - logging.debug(test_cfg['task_get_extrn_ics']) - test_cfg['task_get_extrn_ics'] = check_task_get_extrn_ics(test_cfg,machine_defaults,config_defaults) - logging.debug(test_cfg['task_get_extrn_ics']) + test_cfg['task_get_extrn_ics'] = check_task_get_extrn_bcs(test_cfg,machine_defaults, + config_defaults,"ics") if 'task_get_extrn_lbcs' in test_cfg: - logging.debug(test_cfg['task_get_extrn_lbcs']) - test_cfg['task_get_extrn_lbcs'] = check_task_get_extrn_lbcs(test_cfg,machine_defaults,config_defaults) - logging.debug(test_cfg['task_get_extrn_lbcs']) + test_cfg['task_get_extrn_lbcs'] = check_task_get_extrn_bcs(test_cfg,machine_defaults, + config_defaults,"lbcs") if 'verification' in test_cfg: - logging.debug(test_cfg['verification']) - test_cfg['verification'] = check_task_verification(test_cfg,machine_defaults,config_defaults) - logging.debug(test_cfg['verification']) + test_cfg['verification'] = check_task_verification(test_cfg,machine_defaults, + config_defaults) - logging.debug(f"Writing updated config.yaml for test {test_name}\nbased on specified command-line arguments:\n") + logging.debug(f"Writing updated config.yaml for test {test_name}\n"\ + "based on specified command-line arguments:\n") logging.debug(cfg_to_yaml_str(test_cfg)) - with open(ushdir + "/config.yaml","w") as f: + with open(os.path.join(ushdir,"config.yaml"),"w", encoding="utf-8") as f: f.writelines(cfg_to_yaml_str(test_cfg)) logging.info(f"Calling workflow generation function for test {test_name}\n") if args.quiet: console_handler = logging.getLogger().handlers[1] console_handler.setLevel(logging.WARNING) - expt_dir = generate_FV3LAM_wflow(ushdir,logfile=f"{ushdir}/log.generate_FV3LAM_wflow",debug=args.debug) + expt_dir = generate_FV3LAM_wflow(ushdir,logfile=f"{ushdir}/log.generate_FV3LAM_wflow", + debug=args.debug) if args.quiet: if args.debug: console_handler.setLevel(logging.DEBUG) @@ -205,12 +216,20 @@ def run_we2e_tests(homedir, args) -> None: if not args.use_cron_to_relaunch: logging.info("calling function that monitors jobs, prints summary") - monitor_file = monitor_jobs(monitor_yaml, debug=args.debug) - - logging.info("All experiments are complete") - logging.info(f"Summary of results available in {monitor_file}") - - + monitor_file = f'WE2E_tests_{datetime.now().strftime("%Y%m%d%H%M%S")}.yaml' + write_monitor_file(monitor_file,monitor_yaml) + try: + monitor_file = monitor_jobs(monitor_yaml, monitor_file=monitor_file, procs=args.procs, + debug=args.debug) + except KeyboardInterrupt: + logging.info("\n\nUser interrupted monitor script; to resume monitoring jobs run:\n") + logging.info(f"./monitor_jobs.py -y={monitor_file} -p={args.procs}\n") + else: + logging.info("All experiments are complete") + logging.info(f"Summary of results available in {monitor_file}") + else: + logging.info("All experiments have been generated; using cron to submit workflows") + logging.info("To view running experiments in cron try `crontab -l`") @@ -252,11 +271,12 @@ def check_tests(tests: list) -> list: if os.path.islink(testfile): if os.path.realpath(testfile) in tests_to_run: logging.warning(dedent(f"""WARNING: test file {testfile} is a symbolic link to a - test file ({os.path.realpath(testfile)}) that is also included in the - test list. Only the latter test will be run.""")) + test file ({os.path.realpath(testfile)}) that is also included in + the test list. Only the latter test will be run.""")) tests_to_run.remove(testfile) if len(tests_to_run) != len(set(tests_to_run)): - logging.warning("\nWARNING: Duplicate test names were found in list. Removing duplicates and continuing.\n") + logging.warning("\nWARNING: Duplicate test names were found in list. "\ + "Removing duplicates and continuing.\n") tests_to_run = list(set(tests_to_run)) return tests_to_run @@ -283,121 +303,82 @@ def check_test(test: str) -> str: return config -def check_task_get_extrn_ics(cfg: dict, mach: dict, dflt: dict) -> dict: +def check_task_get_extrn_bcs(cfg: dict, mach: dict, dflt: dict, ics_or_lbcs: str = "") -> dict: """ - Function for checking and updating various settings in task_get_extrn_ics section of test config yaml + Function for checking and updating various settings in task_get_extrn_ics or + task_get_extrn_lbcs section of test config yaml Args: cfg : Dictionary loaded from test config file mach : Dictionary loaded from machine settings file dflt : Dictionary loaded from default config file + ics_or_lbcs: Perform checks for ICs task or LBCs task + Returns: - cfg_ics : Updated dictionary for task_get_extrn_ics section of test config + cfg_bcs : Updated dictionary for task_get_extrn_[ics|lbcs] section of test config """ - #Make our lives easier by shortening some dictionary calls - cfg_ics = cfg['task_get_extrn_ics'] - - # If RUN_TASK_GET_EXTRN_ICS is explicitly set to false, do nothing and return - if 'workflow_switches' in cfg: - if 'RUN_TASK_GET_EXTRN_ICS' in cfg['workflow_switches']: - if cfg['workflow_switches']['RUN_TASK_GET_EXTRN_ICS'] is False: - return cfg_ics + if ics_or_lbcs not in ["lbcs", "ics"]: + raise ValueError("ics_or_lbcs must be set to 'lbcs' or 'ics'") - # If USE_USER_STAGED_EXTRN_FILES not specified or false, do nothing and return - if not cfg_ics.get('USE_USER_STAGED_EXTRN_FILES'): - logging.debug(f'USE_USER_STAGED_EXTRN_FILES not specified or False in task_get_extrn_ics section of config') - return cfg_ics - - # If EXTRN_MDL_SYSBASEDIR_ICS is "set_to_non_default_location_in_testing_script", replace with test value from machine file - if cfg_ics.get('EXTRN_MDL_SYSBASEDIR_ICS') == "set_to_non_default_location_in_testing_script": - if 'TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS' in mach['platform']: - if os.path.isdir(mach['platform']['TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS']): - raise FileNotFoundError(f"Non-default input file location TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS from machine file does not exist or is not a directory") - cfg_ics['EXTRN_MDL_SYSBASEDIR_ICS'] = mach['platform']['TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS'] - else: - raise KeyError(f"Non-default input file location TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS not set in machine file") - return cfg_ics - - # Because USE_USER_STAGED_EXTRN_FILES is true, only look on disk, and ensure the staged data directory exists - cfg['platform']['EXTRN_MDL_DATA_STORES'] = "disk" - if 'TEST_EXTRN_MDL_SOURCE_BASEDIR' not in mach['platform']: - raise KeyError("TEST_EXTRN_MDL_SOURCE_BASEDIR, the directory for staged test data,"\ - "has not been specified in the machine file for this platform") - if not os.path.isdir(mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']): - raise FileNotFoundError(dedent(f"""The directory for staged test data specified in this platform's machine file - TEST_EXTRN_MDL_SOURCE_BASEDIR = {mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']} - does not exist.""")) - - # Different input data types have different directory structures, so set the data directory accordingly - if cfg_ics['EXTRN_MDL_NAME_ICS'] == 'FV3GFS': - if 'FV3GFS_FILE_FMT_ICS' not in cfg_ics: - cfg_ics['FV3GFS_FILE_FMT_ICS'] = dflt['task_get_extrn_ics']['FV3GFS_FILE_FMT_ICS'] - cfg_ics['EXTRN_MDL_SOURCE_BASEDIR_ICS'] = f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}/"\ - f"{cfg_ics['EXTRN_MDL_NAME_ICS']}/{cfg_ics['FV3GFS_FILE_FMT_ICS']}/${{yyyymmddhh}}" - else: - cfg_ics['EXTRN_MDL_SOURCE_BASEDIR_ICS'] = f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}/"\ - f"{cfg_ics['EXTRN_MDL_NAME_ICS']}/${{yyyymmddhh}}" - - return cfg_ics - -def check_task_get_extrn_lbcs(cfg: dict, mach: dict, dflt: dict) -> dict: - """ - Function for checking and updating various settings in task_get_extrn_lbcs section of test config yaml - - Args: - cfg : Dictionary loaded from test config file - mach : Dictionary loaded from machine settings file - dflt : Dictionary loaded from default config file - Returns: - cfg_lbcs : Updated dictionary for task_get_extrn_lbcs section of test config - """ + I_OR_L = ics_or_lbcs.upper() #Make our lives easier by shortening some dictionary calls - cfg_lbcs = cfg['task_get_extrn_lbcs'] + cfg_bcs = cfg[f'task_get_extrn_{ics_or_lbcs}'] - # If RUN_TASK_GET_EXTRN_LBCS is explicitly set to false, do nothing and return - if 'workflow_switches' in cfg: - if 'RUN_TASK_GET_EXTRN_LBCS' in cfg['workflow_switches']: - if cfg['workflow_switches']['RUN_TASK_GET_EXTRN_LBCS'] is False: - return cfg_lbcs + # If RUN_TASK_GET_EXTRN_* is explicitly set to false, do nothing and return + if cfg.get('workflow_switches', {}).get(f'RUN_TASK_GET_EXTRN_{I_OR_L}', True) is False: + return cfg_bcs # If USE_USER_STAGED_EXTRN_FILES not specified or false, do nothing and return - if not cfg_lbcs.get('USE_USER_STAGED_EXTRN_FILES'): - logging.debug(f'USE_USER_STAGED_EXTRN_FILES not specified or False in task_get_extrn_lbcs section of config') - return cfg_lbcs - - # If EXTRN_MDL_SYSBASEDIR_LBCS is "set_to_non_default_location_in_testing_script", replace with test value from machine file - if cfg_lbcs.get('EXTRN_MDL_SYSBASEDIR_LBCS') == "set_to_non_default_location_in_testing_script": - if 'TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS' in mach['platform']: - if os.path.isdir(mach['platform']['TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS']): - raise FileNotFoundError(f"Non-default input file location TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS from machine file does not exist or is not a directory") - cfg_lbcs['EXTRN_MDL_SYSBASEDIR_LBCS'] = mach['platform']['TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS'] + if not cfg_bcs.get('USE_USER_STAGED_EXTRN_FILES'): + logging.debug('USE_USER_STAGED_EXTRN_FILES not specified or False in '\ + f'task_get_extrn_{ics_or_lbcs} section of config') + return cfg_bcs + + # If EXTRN_MDL_SYSBASEDIR_* is "set_to_non_default_location_in_testing_script", replace with + # test value from machine file + if cfg_bcs.get(f'EXTRN_MDL_SYSBASEDIR_{I_OR_L}') == \ + "set_to_non_default_location_in_testing_script": + if f'TEST_ALT_EXTRN_MDL_SYSBASEDIR_{I_OR_L}' in mach['platform']: + if os.path.isdir(mach['platform'][f'TEST_ALT_EXTRN_MDL_SYSBASEDIR_{I_OR_L}']): + raise FileNotFoundError("Non-default input file location "\ + f"TEST_ALT_EXTRN_MDL_SYSBASEDIR_{I_OR_L} from machine "\ + "file does not exist or is not a directory") + cfg_bcs[f'EXTRN_MDL_SYSBASEDIR_{I_OR_L}'] = \ + mach['platform'][f'TEST_ALT_EXTRN_MDL_SYSBASEDIR_{I_OR_L}'] else: - raise KeyError(f"Non-default input file location TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS not set in machine file") - return cfg_lbcs + raise KeyError("Non-default input file location "\ + f"TEST_ALT_EXTRN_MDL_SYSBASEDIR_{I_OR_L} not set in machine file") + return cfg_bcs - # Because USE_USER_STAGED_EXTRN_FILES is true, only look on disk, and ensure the staged data directory exists + # Because USE_USER_STAGED_EXTRN_FILES is true, only look on disk, and ensure the staged data + # directory exists cfg['platform']['EXTRN_MDL_DATA_STORES'] = "disk" if 'TEST_EXTRN_MDL_SOURCE_BASEDIR' not in mach['platform']: raise KeyError("TEST_EXTRN_MDL_SOURCE_BASEDIR, the directory for staged test data,"\ "has not been specified in the machine file for this platform") if not os.path.isdir(mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']): - raise FileNotFoundError(dedent(f"""The directory for staged test data specified in this platform's machine file - TEST_EXTRN_MDL_SOURCE_BASEDIR = {mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']} - does not exist.""")) - - # Different input data types have different directory structures, so set the data directory accordingly - if cfg_lbcs['EXTRN_MDL_NAME_LBCS'] == 'FV3GFS': - if 'FV3GFS_FILE_FMT_LBCS' not in cfg_lbcs: - cfg_lbcs['FV3GFS_FILE_FMT_LBCS'] = dflt['task_get_extrn_lbcs']['FV3GFS_FILE_FMT_LBCS'] - cfg_lbcs['EXTRN_MDL_SOURCE_BASEDIR_LBCS'] = f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}/"\ - f"{cfg_lbcs['EXTRN_MDL_NAME_LBCS']}/{cfg_lbcs['FV3GFS_FILE_FMT_LBCS']}/${{yyyymmddhh}}" + raise FileNotFoundError(dedent( + f"""The directory for staged test data specified in this platform's machine file + TEST_EXTRN_MDL_SOURCE_BASEDIR = {mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']} + does not exist.""")) + + # Different input data types have different directory structures; set data dir accordingly + if cfg_bcs[f'EXTRN_MDL_NAME_{I_OR_L}'] == 'FV3GFS': + if f'FV3GFS_FILE_FMT_{I_OR_L}' not in cfg_bcs: + cfg_bcs[f'FV3GFS_FILE_FMT_{I_OR_L}'] = \ + dflt[f'task_get_extrn_{ics_or_lbcs}'][f'FV3GFS_FILE_FMT_{I_OR_L}'] + cfg_bcs[f'EXTRN_MDL_SOURCE_BASEDIR_{I_OR_L}'] = \ + os.path.join(f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}", + f"{cfg_bcs[f'EXTRN_MDL_NAME_{I_OR_L}']}",f"{cfg_bcs[f'FV3GFS_FILE_FMT_{I_OR_L}']}", + f"${{yyyymmddhh}}") else: - cfg_lbcs['EXTRN_MDL_SOURCE_BASEDIR_LBCS'] = f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}/"\ - f"{cfg_lbcs['EXTRN_MDL_NAME_LBCS']}/${{yyyymmddhh}}" + cfg_bcs[f'EXTRN_MDL_SOURCE_BASEDIR_{I_OR_L}'] = \ + os.path.join(f"{mach['platform']['TEST_EXTRN_MDL_SOURCE_BASEDIR']}", + f"{cfg_bcs[f'EXTRN_MDL_NAME_{I_OR_L}']}/${{yyyymmddhh}}") - return cfg_lbcs + return cfg_bcs def check_task_verification(cfg: dict, mach: dict, dflt: dict) -> dict: """ @@ -423,7 +404,7 @@ def check_task_verification(cfg: dict, mach: dict, dflt: dict) -> dict: return cfg_vx # Attempt to obtain the values of RUN_TASK_RUN_FCST, WRITE_DO_POST, and RUN_TASK_RUN_POST - # from the test configuration dictionary. If not available there, get them from the default + # from the test configuration dictionary. If not available there, get them from the default # configuration dictionary. flags = {'RUN_TASK_RUN_FCST': False, 'WRITE_DOPOST': False, 'RUN_TASK_RUN_POST': False} for section in ['workflow_switches', 'task_run_fcst']: @@ -442,10 +423,11 @@ def check_task_verification(cfg: dict, mach: dict, dflt: dict) -> dict: if 'TEST_VX_FCST_INPUT_BASEDIR' in mach['platform']: cfg_vx['VX_FCST_INPUT_BASEDIR'] = mach['platform']['TEST_VX_FCST_INPUT_BASEDIR'] else: - raise KeyError(f"Non-default forecast file location for verification (TEST_VX_FCST_INPUT_BASEDIR) not set in machine file") + cfg_vx['VX_FCST_INPUT_BASEDIR'] = '' return cfg_vx + def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> None: """ Sets up logging, printing high-priority (INFO and higher) messages to screen, and printing all @@ -455,7 +437,7 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N formatter = logging.Formatter("%(name)-16s %(levelname)-8s %(message)s") - fh = logging.FileHandler(logfile, mode='w') + fh = logging.FileHandler(logfile, mode='a') fh.setLevel(logging.DEBUG) fh.setFormatter(formatter) logging.getLogger().addHandler(fh) @@ -481,31 +463,57 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N logfile='log.run_WE2E_tests' #Parse arguments - parser = argparse.ArgumentParser(epilog="For more information about config arguments (denoted in CAPS), see ush/config_defaults.yaml\n") - optional = parser._action_groups.pop() # Create a group for optional arguments so they can be listed after required args + parser = argparse.ArgumentParser(epilog="For more information about config arguments (denoted "\ + "in CAPS), see ush/config_defaults.yaml\n") + # Create a group for optional arguments so they can be listed after required args + optional = parser._action_groups.pop() required = parser.add_argument_group('required arguments') - required.add_argument('-m', '--machine', type=str, help='Machine name; see ush/machine/ for valid values', required=True) - required.add_argument('-a', '--account', type=str, help='Account name for running submitted jobs', required=True) - required.add_argument('-t', '--tests', type=str, nargs="*", help="""Can be one of three options (in order of priority): + required.add_argument('-m', '--machine', type=str, + help='Machine name; see ush/machine/ for valid values', required=True) + required.add_argument('-a', '--account', type=str, + help='Account name for running submitted jobs', required=True) + required.add_argument('-t', '--tests', type=str, nargs="*", + help="""Can be one of three options (in order of priority): 1. A test name or list of test names. 2. A test suite name ("fundamental", "comprehensive", or "all") 3. The name of a file (full or relative path) containing a list of test names. """, required=True) - parser.add_argument('-c', '--compiler', type=str, help='Compiler used for building the app', default='intel') - parser.add_argument('-d', '--debug', action='store_true', help='Script will be run in debug mode with more verbose output') - parser.add_argument('-q', '--quiet', action='store_true', help='Suppress console output from workflow generation; this will help keep the screen uncluttered') - + parser.add_argument('-c', '--compiler', type=str, + help='Compiler used for building the app', default='intel') + parser.add_argument('-d', '--debug', action='store_true', + help='Script will be run in debug mode with more verbose output') + parser.add_argument('-q', '--quiet', action='store_true', + help='Suppress console output from workflow generation; this will help '\ + 'keep the screen uncluttered') + parser.add_argument('-p', '--procs', type=int, + help='Run resource-heavy tasks (such as calls to rocotorun) in parallel, '\ + 'with provided number of parallel tasks', default=1) parser.add_argument('--modulefile', type=str, help='Modulefile used for building the app') - parser.add_argument('--run_envir', type=str, help='Overrides RUN_ENVIR variable to a new value ( "nco" or "community" ) for all experiments', default='') - parser.add_argument('--expt_basedir', type=str, help='Explicitly set EXPT_BASEDIR for all experiments') - parser.add_argument('--exec_subdir', type=str, help='Explicitly set EXEC_SUBDIR for all experiments') - parser.add_argument('--use_cron_to_relaunch', action='store_true', help='Explicitly set USE_CRON_TO_RELAUNCH for all experiments; this option disables the "monitor" script functionality') - parser.add_argument('--cron_relaunch_intvl_mnts', type=str, help='Overrides CRON_RELAUNCH_INTVL_MNTS for all experiments') - parser.add_argument('--debug_tests', action='store_true', help='Explicitly set DEBUG=TRUE for all experiments') - parser.add_argument('--verbose_tests', action='store_true', help='Explicitly set VERBOSE=TRUE for all experiments') + parser.add_argument('--run_envir', type=str, + help='Overrides RUN_ENVIR variable to a new value ("nco" or "community") '\ + 'for all experiments', default='') + parser.add_argument('--expt_basedir', type=str, + help='Explicitly set EXPT_BASEDIR for all experiments') + parser.add_argument('--exec_subdir', type=str, + help='Explicitly set EXEC_SUBDIR for all experiments') + parser.add_argument('--use_cron_to_relaunch', action='store_true', + help='Explicitly set USE_CRON_TO_RELAUNCH for all experiments; this '\ + 'option disables the "monitor" script functionality') + parser.add_argument('--cron_relaunch_intvl_mnts', type=int, + help='Overrides CRON_RELAUNCH_INTVL_MNTS for all experiments') + parser.add_argument('--opsroot', type=str, + help='If test is for NCO mode, sets OPSROOT (see config_defaults.yaml for '\ + 'more details on this variable)') + parser.add_argument('--print_test_info', action='store_true', + help='Create a "WE2E_test_info.txt" file summarizing each test prior to'\ + 'starting experiment') + parser.add_argument('--debug_tests', action='store_true', + help='Explicitly set DEBUG=TRUE for all experiments') + parser.add_argument('--verbose_tests', action='store_true', + help='Explicitly set VERBOSE=TRUE for all experiments') parser._action_groups.append(optional) @@ -514,7 +522,13 @@ def setup_logging(logfile: str = "log.run_WE2E_tests", debug: bool = False) -> N #Set defaults that need other argument values if args.modulefile is None: args.modulefile = f'build_{args.machine.lower()}_{args.compiler}' + if args.procs < 1: + raise ValueError('You can not have less than one parallel process; select a valid value '\ + 'for --procs') + # Print test details (if requested) + if args.print_test_info: + print_test_info() #Call main function try: diff --git a/tests/WE2E/run_WE2E_tests.sh b/tests/WE2E/run_WE2E_tests.sh deleted file mode 100755 index d2319b6d87..0000000000 --- a/tests/WE2E/run_WE2E_tests.sh +++ /dev/null @@ -1,1379 +0,0 @@ -#!/bin/bash - -# -#----------------------------------------------------------------------- -# -# This script runs the specified WE2E tests. Type -# -# run_WE2E_tests.sh --help -# -# for a full description of how to use this script. -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script or function is -# located (scrfunc_fp), the name of that file (scrfunc_fn), and the -# directory in which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# -scrfunc_fp=$( readlink -f "${BASH_SOURCE[0]}" ) -scrfunc_fn=$( basename "${scrfunc_fp}" ) -scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Set the full path to the top-level directory of the UFS SRW App -# repository. We denote this path by HOMEdir. The current script -# should be located in the "tests/WE2E" subdirectory under this directory. -# Thus, HOMEdir is the directory two levels above the directory in which -# the current script is located. -# -#----------------------------------------------------------------------- -# -HOMEdir=${scrfunc_dir%/*/*} -# -#----------------------------------------------------------------------- -# -# Set other directories that depend on HOMEdir. -# -#----------------------------------------------------------------------- -# -export USHdir="$HOMEdir/ush" -TESTSdir="$HOMEdir/tests" -WE2Edir="$TESTSdir/WE2E" -# -#----------------------------------------------------------------------- -# -# Source bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -# -#----------------------------------------------------------------------- -# -# Source other needed files. -# -#----------------------------------------------------------------------- -# -. ${WE2Edir}/get_WE2Etest_names_subdirs_descs.sh -# -#----------------------------------------------------------------------- -# -# Run python checks -# -#----------------------------------------------------------------------- -# -python3 $USHdir/check_python_version.py -if [[ $? -ne 0 ]]; then - exit 1 -fi - -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script or function. -# -#----------------------------------------------------------------------- -# -{ save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Set the usage message. -# -#----------------------------------------------------------------------- -# -usage_str="\ -Usage: - - ${scrfunc_fn} \\ - tests_file=\"...\" \\ - machine=\"...\" \\ - account=\"...\" \\ - [expt_basedir=\"...\"] \\ - [exec_subdir=\"...\"] \\ - [use_cron_to_relaunch=\"...\"] \\ - [cron_relaunch_intvl_mnts=\"...\"] \\ - [debug=\"...\"] \\ - [verbose=\"...\"] \\ - [generate_csv_file=\"...\"] \\ - [machine_file=\"...\"] \\ - [opsroot=\"...\"] \\ - [run_envir=\"...\"] \\ - [compiler=\"...\"] \\ - [build_mod_fn=\"...\"] - -The arguments in brackets are optional. The arguments are defined as -follows: - -Exactly one of the following flags for defining which tests to run is -required - - tests_file: - Name of file or relative or absolute path to file containing the list - of WE2E tests to run. This file must contain one test name per line, - with no repeated names. - - test_type: - Name of a supported set of tests. Options are fundamental, - comprehensive, or all. - - test_name: - The name of a single test to run - -machine: -Argument used to explicitly set the experiment variable MACHINE in the -experiment configuration files of all the WE2E tests the user wants to -run. (A description of MACHINE can be found in the default experiment -configuration file.) This is a required argument. - -account: -Argument used to explicitly set the experiment variable ACCOUNT in the -experiment configuration files of all the WE2E tests the user wants to -run. (A description of ACCOUNT can be found in the default experiment -configuration file.) This is a required argument. - -expt_basedir: -Optional argument used to explicitly set the experiment variable -EXPT_BASEDIR in the experiment configuration files of all the WE2E tests -the user wants to run. (A description of EXPT_BASEDIR can be found in -the default experiment configuration file.) If expt_basedir is specified -in the call to this script, its value is used to set EXPT_BASEDIR in the -configuration files. If it is not specified, EXPT_BASEDIR is not set in -the configuration files, in which case the workflow generation script -sets it to a default value. Note that if expt_basedir is set to a -relative path (e.g. expt_basedir=\"testset1\" in the call to this script), -then the experiment generation script will set EXPT_BASEDIR for the -experiment to a default absolute path followed by \${expt_basedir}. -This feature can be used to group the WE2E tests into subdirectories for -convenience, e.g. a set of tests under subdirectory testset1, another -set of tests under testset2, etc. - -exec_subdir: -Optional argument used to explicitly set the experiment variable -EXEC_SUBDIR in the experiment configuration files of all the WE2E tests -the user wants to run. See the default experiment configuration file -\"config_defaults.sh\" for a full description of EXEC_SUBDIR. - -use_cron_to_relaunch: -Optional argument used to explicitly set the experiment variable -USE_CRON_TO_RELAUNCH in the experiment configuration files of all the -WE2E tests the user wants to run. (A description of USE_CRON_TO_RELAUNCH -can be found in the default experiment configuration file.) If -use_cron_to_relaunch is specified in the call to this script, its value -is used to set USE_CRON_TO_RELAUNCH in the configuration files. If it -is not specified, USE_CRON_TO_RELAUNCH is set to \"TRUE\" in the -configuration files, in which case cron jobs are used to (re)launch the -workflows for all tests (one cron job per test). Thus, use_cron_to_relaunch -needs to be specified only if the user wants to turn off use of cron jobs -for all tests (by specifying use_cron_to_relaunch=\"FALSE\" on the command -line). Note that it is not possible to specify a different value for -USE_CRON_TO_RELAUNCH for each test via this argument; either all tests -use cron jobs or none do. - -cron_relaunch_intvl_mnts: -Optional argument used to explicitly set the experiment variable -CRON_RELAUNCH_INTVL_MNTS in the experiment configuration files of -all the WE2E tests the user wants to run. (A description of -CRON_RELAUNCH_INTVL_MNTS can be found in the default experiment -configuration file.) If cron_relaunch_intvl_mnts is specified in the -call to this script, its value is used to set CRON_RELAUNCH_INTVL_MNTS -in the configuration files. If it is not specified, CRON_RELAUNCH_INTVL_MNTS -is set to \"02\" (i.e. two minutes) in the configuration files. Note -that it is not possible to specify a different value for -CRON_RELAUNCH_INTVL_MNTS for each test via this argument; all tests will -use the same value for USE_CRON_TO_RELAUNCH (either the value specified -in the call to this script or the default value of \"02\"). Note also -that the value of this argument matters only if the argument -use_cron_to_relaunch is not explicitly set to \"FALSE\" in the call to -this script. - -debug: -If true, run test case in debugging mode. - -verbose: -Optional argument used to explicitly set the experiment variable VERBOSE -in the experiment configuration files of all the WE2E tests the user -wants to run. (A description of VERBOSE can be found in the default -experiment configuration file.) If verbose is specified in the call to -this script, its value is used to set VERBOSE in the configuration files. -If it is not specified, VERBOSE is set to \"TRUE\" in the configuration -files. Note that it is not possible to specify a different value for -VERBOSE for each test via this argument; either all tests will have -VERBOSE set to \"TRUE\" or all will have it set to \"FALSE\". - -generate_csv_file: -Optional argument that specifies whether or not to generate a CSV file -containing summary information about all the tests available in the WE2E -testing system. Default value is \"TRUE\". - -machine_file: -Optional argument specifying the full path to a machine configuration -file. If not set, a supported platform machine file may be used. - -opsroot: -Operations root directory in NCO mode - -run_envir: -Overrides RUN_ENVIR variable to a new value ( nco or community ) - -compiler: -Optional argument used to explicitly set the experiment variable COMPILER -in the experiment configuration files of all the WE2E tests the user -wants to run. (A description of COMPILER can be found in the default -experiment configuration file.) If compiler is specified in the call to -this script, its value is used to set COMPILER in the configuration files. -If it is not specified, COMPILER is set to \"intel\" in the configuration -files. Note that it is not possible to specify a different value for -COMPILER for each test via this argument; all tests will use the same -value for COMPILER (either the value specified in the call to this script -or the default value of \"intel\"). - -build_mod_fn: -Optional argument used to explicitly set the experiment variable -BUILD_MOD_FN in the experiment configuration files of all the WE2E tests -the user wants to run (e.g. \"build_cheyenne_gnu\"). If the string -\"gnu\" appears in this file name, the \"compiler\" option to this -function must also be specified with the value \"gnu\". - - -Usage Examples: --------------- -Here, we give several common usage examples. In the following, assume -my_tests.txt is a text file in the same directory as this script containing -a list of test names that we want to run, e.g. - -> more my_tests.txt -new_ESGgrid -specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE - -Then: - -1) To run the tests listed in my_tests.txt on Hera and charge the core- - hours used to the \"rtrr\" account, use: - - > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" - - This will create the experiment subdirectories for the two tests in - the directory - - \${HOMEdir}/../expt_dirs - - where HOMEdir is the directory in which the ufs-srweather-app - repository is cloned. Thus, the following two experiment directories - will be created: - - \${HOMEdir}/../expt_dirs/new_ESGgrid - \${HOMEdir}/../expt_dirs/specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE - - In addition, by default, cron jobs will be created in the user's cron - table to relaunch the workflows of these experiments every 2 minutes. - -2) To change the frequency with which the cron relaunch jobs are submitted - from the default of 2 minutes to 1 minute, use: - - > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" cron_relaunch_intvl_mnts=\"01\" - -3) To disable use of cron (which means the worfkow for each test will - have to be relaunched manually from within each experiment directory), - use: - - > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" use_cron_to_relaunch=\"FALSE\" - -4) To place the experiment subdirectories in a subdirectory named \"test_set_01\" - under - - \${HOMEdir}/../expt_dirs - - (instead of immediately under the latter), use: - - > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" expt_basedir=\"test_set_01\" - - In this case, the full paths to the experiment directories will be: - - \${HOMEdir}/../expt_dirs/test_set_01/new_ESGgrid - \${HOMEdir}/../expt_dirs/test_set_01/specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE - -5) To use a list of tests that is located in - - /path/to/custom/my_tests.txt - - instead of in the same directory as this script, and to have the - experiment directories be placed in an arbitrary location, say - - /path/to/custom/expt_dirs - - use: - - > run_WE2E_tests.sh tests_file=\"/path/to/custom/my_tests.txt\" machine=\"hera\" account=\"rtrr\" expt_basedir=\"/path/to/custom/expt_dirs\" -" -# -#----------------------------------------------------------------------- -# -# Check to see if usage help for this script is being requested. If so, -# print it out and exit with a 0 exit code (success). -# -#----------------------------------------------------------------------- -# -help_flag="--help" -if [ "$#" -eq 1 ] && [ "$1" = "${help_flag}" ]; then - print_info_msg "${usage_str}" - exit 0 -fi -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script or function. -# Then process the arguments provided to it on the command line (which -# should consist of a set of name-value pairs of the form arg1="value1", -# arg2="value2", etc). -# -#----------------------------------------------------------------------- -# -valid_args=( \ - "tests_file" \ - "test_type" \ - "test_name" \ - "machine" \ - "account" \ - "expt_basedir" \ - "exec_subdir" \ - "use_cron_to_relaunch" \ - "cron_relaunch_intvl_mnts" \ - "debug" \ - "verbose" \ - "generate_csv_file" \ - "machine_file" \ - "opsroot" \ - "run_envir" \ - "compiler" \ - "build_mod_fn" \ - ) -process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# "TRUE". -# -#----------------------------------------------------------------------- -# -print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Verify that the required arguments to this script have been specified. -# If not, print out an error message and exit. -# -#----------------------------------------------------------------------- -# -help_msg="\ -Use - ${scrfunc_fn} ${help_flag} -to get help on how to use this script." - -if [ -z "${tests_file}" ] && [ -z "${test_name}" ] && [ -z "${test_type}" ] ; then - print_err_msg_exit "\ -At least on of the following arguments must be specified to run this -script: - tests_file - test_name - test_type -${help_msg}" -fi - -if [ -z "${machine}" ]; then - print_err_msg_exit "\ -The argument \"machine\" specifying the machine or platform on which to -run the WE2E tests was not specified in the call to this script. \ -${help_msg}" -fi -machine=${machine,,} - - # Cheyenne-specific test limitation - -if [ "${machine}" = "cheyenne" ]; then - use_cron_to_relaunch=FALSE - echo " -Due to system limitations, the 'use_cron_to_relaunch' command can not be used on -the '${machine}' machine. Setting this variable to false. - -" -fi - -if [ -z "${account}" ]; then - print_err_msg_exit "\ -The argument \"account\" specifying the account under which to submit -jobs to the queue when running the WE2E tests was not specified in the -call to this script. \ -${help_msg}" -fi -# -#----------------------------------------------------------------------- -# -# Set the list of tests to run. -# -#----------------------------------------------------------------------- -# -if [ -n "${test_name}" ] ; then - - # User specified a single test - user_spec_tests=( "${test_name}" ) - -elif [ "${test_type}" = "all" ] ; then - - # User would like to run all the tests available - user_spec_tests=() - for fp in $(find ${scrfunc_dir}/test_configs -name "config.*" -type f ) ; do - user_spec_tests+=("$(basename $fp | cut -f 2 -d .)") - done - -elif [ -n "${tests_file}" ] || [ -n "${test_type}" ] ; then - - # User wants to run a set of tests from a file, either their own or - # one managed in the repo - - if [ -n "${test_type}" ] ; then - # Check for a pre-defined set. It could be machine dependent or has the mode - # (community or nco), or default - user_spec_tests_fp=${scrfunc_dir}/machine_suites/${test_type}.${machine}.${compiler}.nco - if [ ! -f ${user_spec_tests_fp} ]; then - user_spec_tests_fp=${scrfunc_dir}/machine_suites/${test_type}.${machine}.${compiler}.com - if [ ! -f ${user_spec_tests_fp} ]; then - user_spec_tests_fp=${scrfunc_dir}/machine_suites/${test_type}.${machine}.${compiler} - if [ ! -f ${user_spec_tests_fp} ]; then - user_spec_tests_fp=${scrfunc_dir}/machine_suites/${test_type}.${machine} - if [ ! -f ${user_spec_tests_fp} ]; then - user_spec_tests_fp=${scrfunc_dir}/machine_suites/${test_type} - fi - fi - else - run_envir=${run_envir:-"community"} - fi - else - run_envir=${run_envir:-"nco"} - fi - elif [ -n "${tests_file}" ] ; then - user_spec_tests_fp=$( readlink -f "${tests_file}" ) - fi - - if [ ! -f "${user_spec_tests_fp}" ]; then - print_err_msg_exit "\ - The file containing the user-specified list of WE2E tests to run - (tests_file) that is passed in as an argument to this script does not - exit: - tests_file = \"${tests_file}\" - The full path to this script is: - user_spec_tests_fp = \"${user_spec_tests_fp}\" - Please ensure that this file exists and rerun." - fi - # - #----------------------------------------------------------------------- - # - # Read in each line of the file specified by user_spec_tests_fp and add - # each non-empty line to the array user_spec_tests. Note that the read - # command will remove any leading and trailing whitespace from each line - # in user_spec_tests_fp [because it treats whatever character(s) the bash - # variable IFS (Internal Field Separator) is set to as word separators - # on each line, and IFS is by default set to a space, a tab, and a - # newline]. - # - #----------------------------------------------------------------------- - # - user_spec_tests=() - while read -r line; do - if [ ! -z "$line" ]; then - user_spec_tests+=("$line") - fi - done < "${user_spec_tests_fp}" - -fi -# -#----------------------------------------------------------------------- -# -# Call a function to obtain the names of all available WE2E tests (i.e. -# not just the ones the user wants to run but all that are part of the -# WE2E testing system), the test IDs, and the category subdirectory in -# which each corresponding test configuration file is located. -# -# The array of test names (avail_WE2E_test_names) that the function -# called below returns contains both primary and alternate test names. -# A primary test name is a test name obtained from the name of a WE2E -# test configuration file that is an ordinary file, i.e. not a symlink, -# whereas an alternate name is one that is derived from the name of a -# symlink whose target is an ordinary test configuration file (but not -# another symlink). To be able to determine the set of test names that -# correspond to the same primary test, the function called also returns -# an array of test IDs (avail_WE2E_test_IDs) such that the IDs for a -# primary test name and all the alternate names that map to it (if any) -# are the same. These IDs will be used later below to ensure that the -# user does not list in the set of test names to run a given test more -# than once, e.g. by accidentally including in the list its primary name -# as well as one of its alternate names. -# -# The category subdirectories in the array avail_WE2E_test_subdirs -# returned by the function called below are relative to the base -# directory under which the WE2E test configuration files are located. -# This base directory is set by the function call below and is returned -# in the output variable avail_WE2E_test_configs_basedir. The i-th -# element of avail_WE2E_test_subdirs specifies the subdirectory under -# this base directory that contains the ordinary test configuration file -# (for a primary test name) or the symlink (for an alternate test name) -# corresponding to the i-th element (which may be a primary or alternate -# test name) in avail_WE2E_test_names. We refer to these subdirectories -# as "category" subdirectories because they are used for clarity to group -# the WE2E tests into types or categories. -# -# Finally, note that the returned arrays -# -# avail_WE2E_test_names -# avail_WE2E_test_ids -# avail_WE2E_test_subdirs -# -# are sorted in order of increasing test ID and such that for a given -# set of test names that share the same ID, the primary test name is -# listed first followed by zero or more alternate names. As an example, -# assume that there are three category subdirectories under the base -# directory specified by avail_WE2E_test_configs_basedir: dir1, dir2, -# and dir3. Also, assume that dir1 contains a test configuration file -# named config.primary_name.sh that is an ordinary file, and dir2 and dir3 -# contain the following symlinks that point config.primary_name.sh: -# -# ${avail_WE2E_test_configs_basedir}/dir2/config.alt_name_1.sh -# --> ${avail_WE2E_test_configs_basedir}/dir1/config.primary_name.sh -# -# ${avail_WE2E_test_configs_basedir}/dir3/config.alt_name_2.sh -# --> ${avail_WE2E_test_configs_basedir}/dir1/config.primary_name.sh -# -# Finally, assume that the ID of the test primary_name is 21 and that -# this ID is at indices 7, 8, and 9 in avail_WE2E_test_ids. Then indices -# 7, 8, and 9 of the three arrays returned by the function call below -# may be as follows: -# -# avail_WE2E_test_names[7]="primary_name" -# avail_WE2E_test_names[8]="alt_name_1" -# avail_WE2E_test_names[9]="alt_name_2" -# -# avail_WE2E_test_ids[7]="21" -# avail_WE2E_test_ids[8]="21" -# avail_WE2E_test_ids[9]="21" -# -# avail_WE2E_test_subdirs[7]="dir1" -# avail_WE2E_test_subdirs[8]="dir2" -# avail_WE2E_test_subdirs[9]="dir3" -# -#----------------------------------------------------------------------- -# -print_info_msg " -Getting information about all available WE2E tests..." - -get_WE2Etest_names_subdirs_descs \ - WE2Edir="${WE2Edir}" \ - generate_csv_file="${generate_csv_file}" \ - outvarname_test_configs_basedir="avail_WE2E_test_configs_basedir" \ - outvarname_test_names="avail_WE2E_test_names" \ - outvarname_test_subdirs="avail_WE2E_test_subdirs" \ - outvarname_test_ids="avail_WE2E_test_ids" -# -# Get the total number of available WE2E test names (including alternate -# names). -# -num_avail_WE2E_tests="${#avail_WE2E_test_names[@]}" -# -#----------------------------------------------------------------------- -# -# Loop through the elements of the array user_spec_tests and perform -# sanity checks. For each such element (i.e. for each WE2E test to run -# specified by the user), make sure that: -# -# 1) The name of the test exists in the complete list of available WE2E -# tests in avail_WE2E_test_names. -# 2) The test does not have an ID that is identical to a previously -# considered test in the user-specified list of tests to run (because -# if so, it would be identical to that previously considered test, -# and it would be a waste of computational resources to run). -# -# If these requirements are met, add the test name to the list of tests -# to run in the array names_tests_to_run, and add the test's category -# subdirectory to subdirs_tests_to_run. -# -#----------------------------------------------------------------------- -# -print_info_msg " -Performing sanity checks on user-specified list of WE2E tests to run..." - -names_tests_to_run=() -ids_tests_to_run=() -subdirs_tests_to_run=() -# -# Initialize the array that will contain the remaining available WE2E -# test names (including alternate names, if any) after finding a match -# for the i-th user-specified test name to run in user_spec_tests. -# -remaining_avail_WE2E_test_names=( "${avail_WE2E_test_names[@]}" ) - -num_user_spec_tests="${#user_spec_tests[@]}" -for (( i=0; i<=$((num_user_spec_tests-1)); i++ )); do - - user_spec_test="${user_spec_tests[$i]}" - - print_info_msg "\ - Checking user-specified WE2E test: \"${user_spec_test}\"" -# -# For the current user-specified WE2E test (user_spec_test), loop through -# the list of all remaining available WE2E test names (i.e. the ones that -# haven't yet been matched to any of the user-specified test names to -# run) and make sure that: -# -# 1) The name of the test exists (either as a primary test name or an -# alternate test name) in the list of all available WE2E test names. -# 2) The test is not repeated in the user-specified list of tests to run, -# either under the same name or an alternate name (i.e. make sure that -# it does not have the same test ID as a previously considered test). -# -# Note that in the loop below, the index j gets set to only those elements -# of remaining_avail_WE2E_test_names that are defined [the syntax -# "${!some_array[@]}" expands to the indices of some_array that have -# defined elements]. We do this for efficiency; we unset elements of -# remaining_avail_WE2E_test_names that have already been matched with -# one of the user-specified test names to run because we know that any -# remaining user-specified test names will not match those elements. -# - match_found="FALSE" - for j in "${!remaining_avail_WE2E_test_names[@]}"; do - - test_name="${avail_WE2E_test_names[$j]}" - test_id="${avail_WE2E_test_ids[$j]}" -# -# Check whether the name of the current user-specified test (user_spec_test) -# matches any of the names in the full list of WE2E tests. If so: -# -# 1) Set match_found to "TRUE". -# 2) Make sure that the test to run doesn't have a test ID that is -# identical to a previously considered test in the user-specified -# list of tests to run (which would mean the two tests are identical). -# If so, print out an error message and exit. -# - if [ "${test_name}" = "${user_spec_test}" ]; then - - match_found="TRUE" - - is_element_of "ids_tests_to_run" "${test_id}" && { - - user_spec_tests_str=$(printf " \"%s\"\n" "${user_spec_tests[@]}") - user_spec_tests_str=$(printf "(\n%s\n )" "${user_spec_tests_str}") - - all_names_for_test=() - for (( k=0; k<=$((num_avail_WE2E_tests-1)); k++ )); do - if [ "${avail_WE2E_test_ids[$k]}" = "${test_id}" ]; then - all_names_for_test+=("${avail_WE2E_test_names[$k]}") - fi - done - all_names_for_test_str=$(printf " \"%s\"\n" "${all_names_for_test[@]}") - - print_err_msg_exit "\ -The current user-specified test to run (user_spec_test) is already included -in the list of tests to run (user_spec_tests), either under the same name -or an alternate name: - user_spec_test = \"${user_spec_test}\" - user_spec_tests = ${user_spec_tests_str} -This test has the following primary and possible alternate names: -${all_names_for_test_str} -In order to avoid repeating the same WE2E test (and thus waste computational -resources), only one of these test names can be specified in the list of -tests to run. Please modify this list in the file - user_spec_tests_fp = \"${user_spec_tests_fp}\" -accordingly and rerun." - - } -# -# Append the name of the current user-specified test, its ID, and its -# category subdirectory to the arrays that contain the sanity-checked -# versions of of these quantities. -# - names_tests_to_run+=("${user_spec_test}") - ids_tests_to_run+=("${test_id}") - subdirs_tests_to_run+=("${avail_WE2E_test_subdirs[$j]}") -# -# Remove the j-th element of remaining_avail_WE2E_test_names so that for -# the next user-specified test to run, we do not need to check whether -# the j-th test is a match. Then break out of the loop over all remaining -# available WE2E tests. -# - unset remaining_avail_WE2E_test_names[$j] - break - - fi - - done -# -# If match_found is still "FALSE" after exiting the loop above, then a -# match for the current user-specifed test to run was not found in the -# list of all WE2E tests -- neither as a primary test name nor as an -# alternate name. In this case, print out an error message and exit. -# - if [ "${match_found}" = "FALSE" ]; then - avail_WE2E_test_names_str=$( printf " \"%s\"\n" "${avail_WE2E_test_names[@]}" ) - print_err_msg_exit "\ -The name of the current user-specified test to run (user_spec_test) does -not match any of the names (either primary or alternate) of the available -WE2E tests: - user_spec_test = \"${user_spec_test}\" -Valid values for user_spec_test consist of the names (primary or alternate) -of the available WE2E tests, which are: -${avail_WE2E_test_names_str} -Each name in the user-specified list of tests to run: - 1) Must match one of the (primary or alternate) test names of the - availabe WE2E tests. - 2) Must not be the primary or alternate name of a test that has its - primary or one of its alternate names already included in the user- - specified list of test to run, i.e. tests must not be repeated (in - order not to waste computational resources). -Please modify the user-specified list of tests to run such that it adheres -to the rules above and rerun. This list is in the file specified by the -input variable tests_file: - tests_file = \"${tests_file}\" -The full path to this file is: - user_spec_tests_fp = \"${user_spec_tests_fp}\"" - fi - -done -# -#----------------------------------------------------------------------- -# -# Get the number of WE2E tests to run and print out an informational -# message. -# -#----------------------------------------------------------------------- -# -num_tests_to_run="${#names_tests_to_run[@]}" -tests_to_run_str=$( printf " \'%s\'\n" "${names_tests_to_run[@]}" ) -print_info_msg " -After processing the user-specified list of WE2E tests to run, the number -of tests to run (num_tests_to_run) is - num_tests_to_run = ${num_tests_to_run} -and the list of WE2E tests to run (one test per line) is -${tests_to_run_str}" -# -#----------------------------------------------------------------------- -# -# Loop through the WE2E tests to run. For each test, use the corresponding -# test configuration file to generate a temporary experiment file and -# launch the experiment generation script using that file. -# -#----------------------------------------------------------------------- -# -for (( i=0; i<=$((num_tests_to_run-1)); i++ )); do - - test_name="${names_tests_to_run[$i]}" - test_subdir="${subdirs_tests_to_run[$i]}" -# -# Generate the full path to the current WE2E test's configuration file. -# Then ensure that this file exists. -# - test_config_fp="${avail_WE2E_test_configs_basedir}/${test_subdir}/config.${test_name}.yaml" - - if [ ! -f "${test_config_fp}" ]; then - print_err_msg_exit "\ -The experiment configuration file (test_config_fp) for the current WE2E -test (test_name) does not exist: - test_name = \"${test_name}\" - test_config_fp = \"${test_config_fp}\" -Please correct and rerun." - fi -# -#----------------------------------------------------------------------- -# -# Source the default experiment configuration file to set values of -# various experiment variables to their defaults. Then source the -# current WE2E test's configuration file to overwrite certain variables' -# default values with test-specific ones. -# -#----------------------------------------------------------------------- -# - - # Save the environment variable since a default will override when - # sourced. - save_USHdir=${USHdir} - source_config ${USHdir}/config_defaults.yaml - USHdir=${save_USHdir} - MACHINE_FILE=${machine_file:-"${USHdir}/machine/${machine}.yaml"} - source_config ${MACHINE_FILE} - source_config ${test_config_fp} -# -#----------------------------------------------------------------------- -# -# We will now construct a multiline variable consisting of the contents -# that we want the experiment configuration file for this WE2E test to -# have. Once this variable is constructed, we will write its contents -# to the generic configuration file that the experiment generation script -# reads in (specified by the variable EXPT_CONFIG_FN in the default -# configuration file config_defaults.yaml sourced above) and then run that -# script to generate an experiment for the current WE2E test. -# -# We name the multiline variable that will contain the contents of the -# experiment configuration file "expt_config_str" (short for "experiment -# configuration string"). Here, we initialize this to a null string, -# and we append to it later below. -# -#----------------------------------------------------------------------- -# - expt_config_str="" -# -#----------------------------------------------------------------------- -# -# Set (and then write to expt_config_str) various experiment variables -# that depend on the input arguments to this script (as opposed to -# variable settings in the test configuration file specified by -# test_config_fp). Note that any values of these parameters specified -# in the default experiment configuration file (config_defaults.yaml) -# or in the test configuraiton file (test_config_fp) that were sourced -# above will be overwritten by the settings below. -# -# Note also that if EXPT_BASEDIR ends up getting set to a null string, -# the experiment generation script that gets called further below will -# set it to a default path; if it gets set to a relative path, then the -# experiment generation script will set it to a path consisting of a -# default path with the relative path appended to it; and if it gets set -# to an absolute path, then the workflow will leave it set to that path. -# -#----------------------------------------------------------------------- -# - MACHINE="${machine^^}" - ACCOUNT="${account}" - COMPILER=${compiler:-"intel"} - BUILD_MOD_FN=${build_mod_fn:-"build_${machine}_${COMPILER}"} - EXPT_BASEDIR="${expt_basedir}" - EXPT_SUBDIR="${test_name}" - EXEC_SUBDIR="${exec_subdir}" - USE_CRON_TO_RELAUNCH=${use_cron_to_relaunch:-"TRUE"} - CRON_RELAUNCH_INTVL_MNTS=${cron_relaunch_intvl_mnts:-"02"} - DEBUG=${debug:-"FALSE"} - VERBOSE=${verbose:-"TRUE"} - - expt_config_str=${expt_config_str}"\ -# -# The machine on which to run, the account to which to charge computational -# resources, the base directory in which to create the experiment directory -# (if different from the default location), and the name of the experiment -# subdirectory. -# -MACHINE=\"${MACHINE}\" -ACCOUNT=\"${ACCOUNT}\" - -COMPILER=\"${COMPILER}\" -BUILD_MOD_FN=\"${BUILD_MOD_FN}\"" - - if [ -n "${EXEC_SUBDIR}" ]; then - expt_config_str=${expt_config_str}" -EXEC_SUBDIR=\"${EXEC_SUBDIR}\"" - fi - - if [ -n "${EXPT_BASEDIR}" ]; then - expt_config_str=${expt_config_str}" -EXPT_BASEDIR=\"${EXPT_BASEDIR}\"" - fi - - expt_config_str=${expt_config_str}" -EXPT_SUBDIR=\"${EXPT_SUBDIR}\" -# -# Flag specifying whether or not to automatically resubmit the worfklow -# to the batch system via cron and, if so, the frequency (in minutes) of -# resubmission. -# -USE_CRON_TO_RELAUNCH=\"${USE_CRON_TO_RELAUNCH}\" -CRON_RELAUNCH_INTVL_MNTS=\"${CRON_RELAUNCH_INTVL_MNTS}\" -# -# Flags specifying whether to run in debug and verbose mode. -# -DEBUG=\"${DEBUG}\" -VERBOSE=\"${VERBOSE}\"" -# -#----------------------------------------------------------------------- -# -# Append the contents of the current WE2E test's configuration file to -# the experiment configuration string. -# -#----------------------------------------------------------------------- -# - expt_config_str=${expt_config_str}" -# -#----------------------------------------------------------------------- -#----------------------------------------------------------------------- -# The following section is a copy of this WE2E test's configuration file. -# -" - expt_config_str=${expt_config_str}$( config_to_shell_str "${test_config_fp}" ) - expt_config_str=${expt_config_str}" -# -# End of section from this test's configuration file. -#----------------------------------------------------------------------- -#-----------------------------------------------------------------------" - -# -# Set RUN_ENVIR from the $run_envir argument passed to this script -# -if [ ! -z ${run_envir} ]; then - expt_config_str=${expt_config_str}" -# -# Set RUN_ENVIR -# -RUN_ENVIR=${run_envir}" - - RUN_ENVIR=${run_envir} -fi - -# -# Eval DATE_FIRST/LAST_CYCL commands -# -if [[ $DATE_FIRST_CYCL != [0-9]* ]]; then - DATE_FIRST_CYCL=$(eval ${DATE_FIRST_CYCL}) - expt_config_str=${expt_config_str}" -DATE_FIRST_CYCL=${DATE_FIRST_CYCL}" -fi -if [[ $DATE_LAST_CYCL != [0-9]* ]]; then - DATE_LAST_CYCL=$(eval ${DATE_LAST_CYCL}) - expt_config_str=${expt_config_str}" -DATE_LAST_CYCL=${DATE_LAST_CYCL}" -fi - -# -#----------------------------------------------------------------------- -# -# Modifications to the experiment configuration file if the WE2E test -# uses pre-generated grid, orography, or surface climatology files. -# -# If not running one or more of the grid, orography, and surface -# climatology file generation tasks, specify directories in which -# pregenerated versions of these files can be found. -# -#----------------------------------------------------------------------- -# - if [ "${RUN_TASK_MAKE_GRID}" = "FALSE" ] || \ - [ "${RUN_TASK_MAKE_OROG}" = "FALSE" ] || \ - [ "${RUN_TASK_MAKE_SFC_CLIMO}" = "FALSE" ]; then - - pregen_basedir=${TEST_PREGEN_BASEDIR:-} - - if [ ! -d "${pregen_basedir:-}" ] ; then - print_err_msg_exit "\ -The base directory (pregen_basedir) in which the pregenerated grid, -orography, and/or surface climatology files are located has not been -specified for this machine (MACHINE): - MACHINE= \"${MACHINE}\"" - fi - - pregen_dir="${pregen_basedir}/${PREDEF_GRID_NAME}" - expt_config_str=${expt_config_str}" -# -# Directory containing the pregenerated grid files. -# -DOMAIN_PREGEN_BASEDIR=\"${pregen_basedir}\"" - - fi -# -# Directory for pregenerated grid files. -# - if [ "${RUN_TASK_MAKE_GRID}" = "FALSE" ]; then - GRID_DIR="${pregen_dir}" - expt_config_str=${expt_config_str}" -# -# Directory containing the pregenerated grid files. -# -GRID_DIR=\"${GRID_DIR}\"" - fi -# -# Directory for pregenerated orography files. -# - if [ "${RUN_TASK_MAKE_OROG}" = "FALSE" ]; then - OROG_DIR="${pregen_dir}" - expt_config_str=${expt_config_str}" -# -# Directory containing the pregenerated orography files. -# -OROG_DIR=\"${OROG_DIR}\"" - fi -# -# Directory for pregenerated surface climatology files. -# - if [ "${RUN_TASK_MAKE_SFC_CLIMO}" = "FALSE" ]; then - SFC_CLIMO_DIR="${pregen_dir}" - expt_config_str=${expt_config_str}" -# -# Directory containing the pregenerated surface climatology files. -# -SFC_CLIMO_DIR=\"${SFC_CLIMO_DIR}\"" - fi -# -#----------------------------------------------------------------------- -# -# Modifications to the experiment configuration file if running the WE2E -# test in NCO mode. -# -#----------------------------------------------------------------------- -# - if [ "${RUN_ENVIR}" = "nco" ]; then -# -# Set RUN and envir. -# - expt_config_str=${expt_config_str}" -# -# Set NCO mode RUN and model_ver -# -RUN=\"\${EXPT_SUBDIR}\" -model_ver="we2e"" - -# -# Set OPSROOT. -# - expt_config_str=${expt_config_str}" -# -# Set NCO mode OPSROOT -# -OPSROOT=\"${opsroot:-$OPSROOT}\"" - - fi -# -#----------------------------------------------------------------------- -# -# Modifications to the experiment configuration file if the WE2E test -# uses user-staged external model files. -# -#----------------------------------------------------------------------- -# - if [ "${USE_USER_STAGED_EXTRN_FILES}" = "TRUE" ]; then - - # Ensure we only check on disk for these files - data_stores="disk" - - extrn_mdl_source_basedir=${TEST_EXTRN_MDL_SOURCE_BASEDIR:-} - if [ ! -d "${extrn_mdl_source_basedir:-}" ] ; then - print_err_msg_exit "\ -The base directory (extrn_mdl_source_basedir) in which the user-staged -external model files should be located has not been specified for this -machine (MACHINE): - MACHINE= \"${MACHINE}\"" - fi - EXTRN_MDL_SOURCE_BASEDIR_ICS="${extrn_mdl_source_basedir}/${EXTRN_MDL_NAME_ICS}" - if [ "${EXTRN_MDL_NAME_ICS}" = "FV3GFS" ] ; then - EXTRN_MDL_SOURCE_BASEDIR_ICS="${EXTRN_MDL_SOURCE_BASEDIR_ICS}/${FV3GFS_FILE_FMT_ICS}/\${yyyymmddhh}" - else - EXTRN_MDL_SOURCE_BASEDIR_ICS="${EXTRN_MDL_SOURCE_BASEDIR_ICS}/\${yyyymmddhh}" - fi - - EXTRN_MDL_SOURCE_BASEDIR_LBCS="${extrn_mdl_source_basedir}/${EXTRN_MDL_NAME_LBCS}" - if [ "${EXTRN_MDL_NAME_LBCS}" = "FV3GFS" ] ; then - EXTRN_MDL_SOURCE_BASEDIR_LBCS="${EXTRN_MDL_SOURCE_BASEDIR_LBCS}/${FV3GFS_FILE_FMT_LBCS}/\${yyyymmddhh}" - else - EXTRN_MDL_SOURCE_BASEDIR_LBCS="${EXTRN_MDL_SOURCE_BASEDIR_LBCS}/\${yyyymmddhh}" - fi -# -# Make sure that the forecast length is evenly divisible by the interval -# between the times at which the lateral boundary conditions will be -# specified. -# - rem=$(( 10#${FCST_LEN_HRS} % 10#${LBC_SPEC_INTVL_HRS} )) - if [ "$rem" -ne "0" ]; then - print_err_msg_exit "\ -The forecast length (FCST_LEN_HRS) must be evenly divisible by the lateral -boundary conditions specification interval (LBC_SPEC_INTVL_HRS): - FCST_LEN_HRS = ${FCST_LEN_HRS} - LBC_SPEC_INTVL_HRS = ${LBC_SPEC_INTVL_HRS} - rem = FCST_LEN_HRS%%LBC_SPEC_INTVL_HRS = $rem" - fi - expt_config_str="${expt_config_str} -# -# Locations and names of user-staged external model files for generating -# ICs and LBCs. -# -EXTRN_MDL_SOURCE_BASEDIR_ICS='${EXTRN_MDL_SOURCE_BASEDIR_ICS}' -EXTRN_MDL_FILES_ICS=( ${EXTRN_MDL_FILES_ICS[@]} ) -EXTRN_MDL_SOURCE_BASEDIR_LBCS='${EXTRN_MDL_SOURCE_BASEDIR_LBCS}' -EXTRN_MDL_FILES_LBCS=( ${EXTRN_MDL_FILES_LBCS[@]} ) -EXTRN_MDL_DATA_STORES=\"$data_stores\"" - - fi -# -#----------------------------------------------------------------------- -# -# Check that MET directories have been set appropriately, if needed. -# -#----------------------------------------------------------------------- -# - if [ "${RUN_TASK_VX_GRIDSTAT}" = "TRUE" ] || \ - [ "${RUN_TASK_VX_POINTSTAT}" = "TRUE" ] || \ - [ "${RUN_TASK_VX_ENSGRID}" = "TRUE" ] || \ - [ "${RUN_TASK_VX_ENSPOINT}" = "TRUE" ]; then - - check=0 - if [ ! -d ${MET_INSTALL_DIR} ] ; then - print_info_msg "\ - The MET installation location must be set for this machine! - MET_INSTALL_DIR = \"${MET_INSTALL_DIR}\"" - check=1 - fi - - if [ ! -d ${METPLUS_PATH} ] ; then - print_info_msg "\ - The MET+ installation location must be set for this machine! - METPLUS_PATH = \"${METPLUS_PATH}\"" - check=1 - fi - - if [ -z ${MET_BIN_EXEC} ] ; then - print_info_msg "\ - The MET execution command must be set for this machine! - MET_BIN_EXEC = \"${MET_BIN_EXEC}\"" - check=1 - fi - - if [ ! -d ${CCPA_OBS_DIR} ] ; then - print_info_msg "\ - The CCPA observation location must be set for this machine! - CCPA_OBS_DIR = \"${CCPA_OBS_DIR}\"" - check=1 - fi - - if [ ! -d ${MRMS_OBS_DIR} ] ; then - print_info_msg "\ - The MRMS observation location must be set for this machine! - MRMS_OBS_DIR = \"${MRMS_OBS_DIR}\"" - check=1 - fi - - if [ ! -d ${NDAS_OBS_DIR} ] ; then - print_info_msg "\ - The NDAS observation location must be set for this machine! - NDAS_OBS_DIR = \"${NDAS_OBS_DIR}\"" - check=1 - fi - - if [ ${check} = 1 ] ; then - print_err_msg_exit "\ - Please set MET variables in the machine file for \ - MACHINE = \"${MACHINE}\"" - fi - - fi -# -#----------------------------------------------------------------------- -# -# On some machines (e.g. cheyenne), some tasks often require multiple -# tries before they succeed. To make it more convenient to run the WE2E -# tests on these machines without manual intervention, change the number -# of attempts for such tasks on those machines to be more than one. -# -#----------------------------------------------------------------------- -# - add_maxtries="FALSE" - - if [ "$MACHINE" = "HERA" ]; then - add_maxtries="TRUE" - MAXTRIES_MAKE_ICS="2" - MAXTRIES_MAKE_LBCS="2" - MAXTRIES_RUN_POST="2" - elif [ "$MACHINE" = "CHEYENNE" ]; then - add_maxtries="TRUE" - MAXTRIES_MAKE_SFC_CLIMO="3" - MAXTRIES_MAKE_ICS="5" - MAXTRIES_MAKE_LBCS="10" - MAXTRIES_RUN_POST="10" - fi - - if [ "${add_maxtries}" = "TRUE" ]; then - - expt_config_str=${expt_config_str}" -# -# Maximum number of attempts at running each task. -# -MAXTRIES_MAKE_GRID=\"${MAXTRIES_MAKE_GRID}\" -MAXTRIES_MAKE_OROG=\"${MAXTRIES_MAKE_OROG}\" -MAXTRIES_MAKE_SFC_CLIMO=\"${MAXTRIES_MAKE_SFC_CLIMO}\" -MAXTRIES_GET_EXTRN_ICS=\"${MAXTRIES_GET_EXTRN_ICS}\" -MAXTRIES_GET_EXTRN_LBCS=\"${MAXTRIES_GET_EXTRN_LBCS}\" -MAXTRIES_MAKE_ICS=\"${MAXTRIES_MAKE_ICS}\" -MAXTRIES_MAKE_LBCS=\"${MAXTRIES_MAKE_LBCS}\" -MAXTRIES_RUN_FCST=\"${MAXTRIES_RUN_FCST}\" -MAXTRIES_RUN_POST=\"${MAXTRIES_RUN_POST}\"" - - fi -# -#----------------------------------------------------------------------- -# Write content to a temporary config file -#----------------------------------------------------------------------- -# - temp_file="$PWD/_config_temp_.sh" - expt_config_fp="${temp_file}" - printf "%s" "${expt_config_str}" > "${expt_config_fp}" -# -#----------------------------------------------------------------------- -# -# The following are changes that need to be made directly to the -# experiment configuration file created above (as opposed to the -# experiment configuration string expt_config_str) because they involve -# resetting of values that have already been set in the experiment -# configuration file. -# -# If EXTRN_MDL_SYSBASEDIR_ICS has been specified in the current WE2E -# test's base configuration file, it must be set to one of the following: -# -# 1) The string "set_to_non_default_location_in_testing_script" in order -# to allow this script to set it to a valid location depending on the -# machine and external model (for ICs). -# -# 2) To an existing directory. If it is set to a directory, then this -# script ensures that the directory exists (via the check below). -# -#----------------------------------------------------------------------- -# - if [ -n "${EXTRN_MDL_SYSBASEDIR_ICS}" ]; then - - if [ "${EXTRN_MDL_SYSBASEDIR_ICS}" = "set_to_non_default_location_in_testing_script" ]; then - - EXTRN_MDL_SYSBASEDIR_ICS="${TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS:-}" - - if [ -z "${EXTRN_MDL_SYSBASEDIR_ICS}" ]; then - print_err_msg_exit "\ -A non-default location for EXTRN_MDL_SYSBASEDIR_ICS for testing purposes -has not been specified for this machine (MACHINE) and external model for -initial conditions (EXTRN_MDL_NAME_ICS) combination: - MACHINE= \"${MACHINE}\" - EXTRN_MDL_NAME_ICS = \"${EXTRN_MDL_NAME_ICS}\"" - fi - - # Maintain any templates in EXTRN_MDL_SYSBASEDIR_ICS -- don't use - # quotes. - set_bash_param "${expt_config_fp}" \ - "EXTRN_MDL_SYSBASEDIR_ICS" ${EXTRN_MDL_SYSBASEDIR_ICS} - - fi - - # Check the base directory for the specified location. - if [ ! -d "$(dirname ${EXTRN_MDL_SYSBASEDIR_ICS%%\$*})" ]; then - print_err_msg_exit "\ -The non-default location specified by EXTRN_MDL_SYSBASEDIR_ICS does not -exist or is not a directory: - EXTRN_MDL_NAME_ICS = \"${EXTRN_MDL_NAME_ICS}\"" - fi - - - fi -# -#----------------------------------------------------------------------- -# -# Same as above but for EXTRN_MDL_SYSBASEDIR_LBCS. -# -#----------------------------------------------------------------------- -# - if [ -n "${EXTRN_MDL_SYSBASEDIR_LBCS}" ]; then - - if [ "${EXTRN_MDL_SYSBASEDIR_LBCS}" = "set_to_non_default_location_in_testing_script" ]; then - - EXTRN_MDL_SYSBASEDIR_LBCS="${TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS:-}" - - if [ -z "${EXTRN_MDL_SYSBASEDIR_LBCS}" ]; then - print_err_msg_exit "\ -A non-default location for EXTRN_MDL_SYSBASEDIR_LBCS for testing purposes -has not been specified for this machine (MACHINE) and external model for -initial conditions (EXTRN_MDL_NAME_LBCS) combination: - MACHINE= \"${MACHINE}\" - EXTRN_MDL_NAME_LBCS = \"${EXTRN_MDL_NAME_LBCS}\"" - fi - - # Maintain any templates in EXTRN_MDL_SYSBASEDIR_ICS -- don't use - # quotes. - set_bash_param "${expt_config_fp}" \ - "EXTRN_MDL_SYSBASEDIR_LBCS" ${EXTRN_MDL_SYSBASEDIR_LBCS} - - fi - - # Check the base directory for the specified location. - if [ ! -d "$(dirname ${EXTRN_MDL_SYSBASEDIR_LBCS%%\$*})" ]; then - print_err_msg_exit "\ -The non-default location specified by EXTRN_MDL_SYSBASEDIR_LBCS does not -exist or is not a directory: - EXTRN_MDL_NAME_LBCS = \"${EXTRN_MDL_NAME_LBCS}\"" - fi - - - fi -# -#----------------------------------------------------------------------- -# -# Set the full path to the configuration file that the experiment -# generation script reads in. Then write the contents of expt_config_str -# to that file. -# -#----------------------------------------------------------------------- -# - expt_config_fp="$USHdir/${EXPT_CONFIG_FN}" - ext="${EXPT_CONFIG_FN##*.}" - config_to_str "${ext}" "${temp_file}" -t "$USHdir/config_defaults.yaml" >"${expt_config_fp}" - rm -rf "${temp_file}" -# -#----------------------------------------------------------------------- -# -# Call the experiment generation script to generate an experiment -# directory and a rocoto workflow XML for the current WE2E test to run. -# -#----------------------------------------------------------------------- -# - $USHdir/generate_FV3LAM_wflow.py - - if [ $? != 0 ] ; then - print_err_msg_exit "\ -Could not generate an experiment for the test specified by test_name: - test_name = \"${test_name}\"" - fi - -done - -# Print notes about monitoring/running jobs if use_cron_to_relaunch = FALSE -topdir=${scrfunc_dir%/*/*/*} -expt_dirs_fullpath="${topdir}/expt_dirs" - -echo " - ======================================================================== - ======================================================================== - - All experiments have been generated in the directory - ${expt_dirs_fullpath} - - ======================================================================== - ======================================================================== -" - -if [ "${use_cron_to_relaunch,,}" = "false" ]; then - echo " - -The variable 'use_cron_to_relaunch' has been set to FALSE. Jobs will not be automatically run via crontab. - -You can run each task manually in the experiment directory: -(${expt_dirs_fullpath}) - -Or you can use the 'run_srw_tests.py' script in the ush/ directory: - - cd $USHdir - ./run_srw_tests.py -e=${expt_dirs_fullpath} - -" -fi - -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script or -# function. -# -#----------------------------------------------------------------------- -# -{ restore_shell_opts; } > /dev/null 2>&1 - diff --git a/tests/WE2E/setup_WE2E_tests.sh b/tests/WE2E/setup_WE2E_tests.sh index c031ad89b2..b617709327 100755 --- a/tests/WE2E/setup_WE2E_tests.sh +++ b/tests/WE2E/setup_WE2E_tests.sh @@ -1,5 +1,10 @@ #!/usr/bin/env bash -[ -n "$HOME" ] && exec -c "$0" "$@" + +# `exec -c` runs this script with clean environment; this avoids some problems +# with double-loading conda environments. Since we do need $HOME to be set for +# rocoto to run properly, pass it as an argument and export it later + +[ -n "$HOME" ] && exec -c "$0" "$HOME" "$@" #---------------------------------------------------------------------- # Wrapper for the automation of UFS Short Range Weather App Workflow @@ -7,7 +12,7 @@ # # The wrapper loads the appropriate workflow environment for the # machine, and sets the machine test suite file before invoking the -# run_WE2E_tests.sh. +# run_WE2E_tests.py script. # # The script is dependent on a successful build of this repo using the # tests/build.sh script in the ufs-srweather-app repository. The UFS @@ -26,14 +31,15 @@ function usage { echo - echo "Usage: $0 machine account [compiler] [test_type] [others] | -h" + echo "Usage: $0 machine account [compiler] [tests] [others] | -h" echo - echo " machine [required] is one of: ${machines[@]}" - echo " account [required] case sensitive name of the user-specific slurm account" - echo " compiler [optional] compiler used to build binaries (intel or gnu)" - echo " test_type [optional] test type: fundamental or comprehensive or all or any other name" - echo " others [optional] All other arguments are forwarded to run_WE2E_tests.sh" - echo " -h display this help" + echo " machine [required] is one of: ${machines[@]}" + echo " account [required] case sensitive name of the user-specific slurm account" + echo " compiler [optional] compiler used to build binaries (intel or gnu)" + echo " tests [optional] tests to run: can be a suite (all|comprehensive|fundamental) + a filename, or a test name" + echo " others [optional] All other arguments are forwarded to run_WE2E_tests.py" + echo " -h display this help" echo exit 1 @@ -42,43 +48,46 @@ function usage { machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud ) if [ "$1" = "-h" ] ; then usage ; fi -[[ $# -le 1 ]] && usage +[[ $# -le 2 ]] && usage -machine=${1,,} -account=$2 -compiler=${3:-intel} -test_type=${4:-fundamental} +homedir=$1 +machine=${2,,} +account=$3 +compiler=${4:-intel} +tests=${5:-fundamental} #---------------------------------------------------------------------- # Set some default options, if user did not pass them #---------------------------------------------------------------------- opts= if [[ "$*" != *"debug"* ]]; then - opts="${opts} debug=TRUE" + opts="${opts} --debug" fi if [[ "$*" != *"verbose"* ]]; then - opts="${opts} verbose=TRUE" + opts="${opts} --verbose" fi if [[ "$*" != *"cron_relaunch_intvl_mnts"* ]]; then - opts="${opts} cron_relaunch_intvl_mnts=4" + opts="${opts} --cron_relaunch_intvl_mnts=4" fi if [[ "$*" != *"exec_subdir"* ]]; then - opts="${opts} exec_subdir=install_${compiler}/exec" + opts="${opts} --exec_subdir=install_${compiler}/exec" fi #----------------------------------------------------------------------- # Run E2E Tests #----------------------------------------------------------------------- +# Export HOME environment variable; needed for rocoto +export HOME=$homedir # Load Python Modules source ../../ush/load_modules_wflow.sh ${machine} # Run the E2E Workflow tests -./run_WE2E_tests.sh \ - machine=${machine} \ - account=${account} \ - compiler=${compiler} \ - test_type=${test_type} \ +./run_WE2E_tests.py \ + --machine=${machine} \ + --account=${account} \ + --compiler=${compiler} \ + --tests=${tests} \ ${opts} \ - "${@:5}" + "${@:6}" diff --git a/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml b/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml index 38fbbe5af6..3a704b3c22 100644 --- a/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml +++ b/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml @@ -10,8 +10,8 @@ platform: workflow: CCPP_PHYS_SUITE: FV3_GFS_2017_gfdlmp PREDEF_GRID_NAME: RRFS_CONUS_25km - DATE_FIRST_CYCL: $DATE_UTIL --utc --date="2 days ago" +%Y%m%d00 - DATE_LAST_CYCL: $DATE_UTIL --utc --date="2 days ago" +%Y%m%d00 + DATE_FIRST_CYCL: '{{ 2|days_ago }}' + DATE_LAST_CYCL: '{{ 2|days_ago }}' FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename task_get_extrn_ics: diff --git a/tests/WE2E/test_configs/wflow_features/config.specify_template_filenames.yaml b/tests/WE2E/test_configs/wflow_features/config.specify_template_filenames.yaml index 462de85819..2c39bc388e 100644 --- a/tests/WE2E/test_configs/wflow_features/config.specify_template_filenames.yaml +++ b/tests/WE2E/test_configs/wflow_features/config.specify_template_filenames.yaml @@ -5,11 +5,11 @@ metadata: user: RUN_ENVIR: community workflow: - DATA_TABLE_TMPL_FN: data_table + DATA_TABLE_FN: data_table DIAG_TABLE_TMPL_FN: diag_table.FV3_GFS_v15p2 FIELD_TABLE_TMPL_FN: field_table.FV3_GFS_v15p2 - MODEL_CONFIG_TMPL_FN: model_configure - NEMS_CONFIG_TMPL_FN: nems.configure + MODEL_CONFIG_FN: model_configure + NEMS_CONFIG_FN: nems.configure CCPP_PHYS_SUITE: FV3_GFS_v15p2 PREDEF_GRID_NAME: RRFS_CONUS_25km DATE_FIRST_CYCL: '2019070100' diff --git a/tests/WE2E/utils.py b/tests/WE2E/utils.py new file mode 100755 index 0000000000..1a6d4aae12 --- /dev/null +++ b/tests/WE2E/utils.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python3 +""" +A collection of utilities used by the various WE2E scripts +""" +import os +import re +import sys +import logging +import subprocess +import sqlite3 +import glob +from textwrap import dedent +from datetime import datetime +from contextlib import closing +from multiprocessing import Pool + +sys.path.append("../../ush") + +from calculate_cost import calculate_cost +from python_utils import ( + cfg_to_yaml_str, + flatten_dict, + load_config_file, + load_shell_config +) + +REPORT_WIDTH = 100 +EXPT_COLUMN_WIDTH = 65 +TASK_COLUMN_WIDTH = 40 +def print_WE2E_summary(expts_dict: dict, debug: bool = False): + """Function that creates a summary for the specified experiment + + Args: + expts_dict (dict): A dictionary containing the information needed to run + one or more experiments. See example file WE2E_tests.yaml + debug (bool): [optional] Enable extra output for debugging + Returns: + None + """ + + # Create summary table as list of strings + summary = [] + summary.append('-'*REPORT_WIDTH) + summary.append(f'Experiment name {" "*(EXPT_COLUMN_WIDTH-17)} | Status | Core hours used ') + summary.append('-'*REPORT_WIDTH) + total_core_hours = 0 + statuses = [] + expt_details = [] + for expt in expts_dict: + statuses.append(expts_dict[expt]["status"]) + ch = 0 + expt_details.append('') + expt_details.append('-'*REPORT_WIDTH) + expt_details.append(f'Detailed summary of experiment {expt}') + expt_details.append(f"in directory {expts_dict[expt]['expt_dir']}") + expt_details.append(f'{" "*TASK_COLUMN_WIDTH}| Status | Walltime | Core hours used') + expt_details.append('-'*REPORT_WIDTH) + + for task in expts_dict[expt]: + # Skip non-task entries + if task in ["expt_dir","status"]: + continue + status = expts_dict[expt][task]["status"] + walltime = expts_dict[expt][task]["walltime"] + expt_details.append(f'{task[:TASK_COLUMN_WIDTH]:<{TASK_COLUMN_WIDTH}s} {status:<12s} {walltime:>10.1f}') + if "core_hours" in expts_dict[expt][task]: + task_ch = expts_dict[expt][task]["core_hours"] + ch += task_ch + expt_details[-1] = f'{expt_details[-1]} {task_ch:>13.2f}' + else: + expt_details[-1] = f'{expt_details[-1]} -' + expt_details.append('-'*REPORT_WIDTH) + expt_details.append(f'Total {" "*(TASK_COLUMN_WIDTH - 6)} {statuses[-1]:<12s} {" "*11} {ch:>13.2f}') + summary.append(f'{expt[:EXPT_COLUMN_WIDTH]:<{EXPT_COLUMN_WIDTH}s} {statuses[-1]:<12s} {ch:>13.2f}') + total_core_hours += ch + if "ERROR" in statuses: + total_status = "ERROR" + elif "RUNNING" in statuses: + total_status = "RUNNING" + elif "QUEUED" in statuses: + total_status = "QUEUED" + elif "DEAD" in statuses: + total_status = "DEAD" + elif "COMPLETE" in statuses: + total_status = "COMPLETE" + else: + total_status = "UNKNOWN" + summary.append('-'*REPORT_WIDTH) + summary.append(f'Total {" "*(EXPT_COLUMN_WIDTH - 6)} {total_status:<12s} {total_core_hours:>13.2f}') + + # Print summary to screen + for line in summary: + print(line) + + # Print summary and details to file + summary_file = f'WE2E_summary_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt' + print(f"\nDetailed summary written to {summary_file}\n") + + with open(summary_file, 'w', encoding="utf-8") as f: + for line in summary: + f.write(f"{line}\n") + f.write("\nDetailed summary of each experiment:\n") + for line in expt_details: + f.write(f"{line}\n") + +def create_expts_dict(expt_dir: str) -> dict: + """ + Function takes in a directory, searches that directory for subdirectories containing + experiments, and creates a skeleton dictionary that can be filled out by update_expt_status() + + Args: + expt_dir (str) : Experiment directory + Returns: + dict : Experiment dictionary + """ + contents = os.listdir(expt_dir) + + expts_dict=dict() + for item in contents: + # Look for FV3LAM_wflow.xml to indicate directories with experiments in them + fullpath = os.path.join(expt_dir, item) + if not os.path.isdir(fullpath): + continue + xmlfile = os.path.join(expt_dir, item, 'FV3LAM_wflow.xml') + if os.path.isfile(xmlfile): + expts_dict[item] = dict() + expts_dict[item].update({"expt_dir": os.path.join(expt_dir,item)}) + expts_dict[item].update({"status": "CREATED"}) + else: + logging.debug(f'Skipping directory {item}, experiment XML file not found') + continue + #Update the experiment dictionary + logging.debug(f"Reading status of experiment {item}") + update_expt_status(expts_dict[item],item,True,False,False) + summary_file = f'WE2E_tests_{datetime.now().strftime("%Y%m%d%H%M%S")}.yaml' + + return summary_file, expts_dict + +def calculate_core_hours(expts_dict: dict) -> dict: + """ + Function takes in an experiment dictionary, reads the var_defns file for necessary information, + and calculates the core hours used by each task, updating expts_dict with this info + + Args: + expts_dict (dict): A dictionary containing the information needed to run + one or more experiments. See example file WE2E_tests.yaml + Returns: + dict : Experiments dictionary updated with core hours + """ + + for expt in expts_dict: + # Read variable definitions file + vardefs_file = os.path.join(expts_dict[expt]["expt_dir"],"var_defns.sh") + if not os.path.isfile(vardefs_file): + logging.warning(f"\nWARNING: For experiment {expt}, variable definitions file") + logging.warning(f"{vardefs_file}\ndoes not exist!\n\nDropping experiment from summary") + continue + logging.debug(f'Reading variable definitions file {vardefs_file}') + vardefs = load_shell_config(vardefs_file) + vdf = flatten_dict(vardefs) + cores_per_node = vdf["NCORES_PER_NODE"] + for task in expts_dict[expt]: + # Skip non-task entries + if task in ["expt_dir","status"]: + continue + # Cycle is last 12 characters, task name is rest (minus separating underscore) + taskname = task[:-13] + # Handle task names that have ensemble and/or fhr info appended with regex + taskname = re.sub('_mem\d{3}', '', taskname) + taskname = re.sub('_f\d{3}', '', taskname) + nnodes_var = f'NNODES_{taskname.upper()}' + if nnodes_var in vdf: + nnodes = vdf[nnodes_var] + # Users are charged for full use of nodes, so core hours = CPN * nodes * time in hrs + core_hours = cores_per_node * nnodes * expts_dict[expt][task]['walltime'] / 3600 + expts_dict[expt][task]['exact_count'] = True + else: + # If we can't find the number of nodes, assume full usage (may undercount) + core_hours = expts_dict[expt][task]['cores'] * \ + expts_dict[expt][task]['walltime'] / 3600 + expts_dict[expt][task]['exact_count'] = False + expts_dict[expt][task]['core_hours'] = round(core_hours,2) + return expts_dict + + +def write_monitor_file(monitor_file: str, expts_dict: dict): + try: + with open(monitor_file,"w", encoding="utf-8") as f: + f.write("### WARNING ###\n") + f.write("### THIS FILE IS AUTO_GENERATED AND REGULARLY OVER-WRITTEN BY WORKFLOW SCRIPTS\n") + f.write("### EDITS MAY RESULT IN MISBEHAVIOR OF EXPERIMENTS RUNNING\n") + f.writelines(cfg_to_yaml_str(expts_dict)) + except: + logging.fatal("\n********************************\n") + logging.fatal(f"WARNING WARNING WARNING\n") + logging.fatal("Failure occurred while writing monitor file {monitor_file}") + logging.fatal("File may be corrupt or invalid for re-run!!") + logging.fatal("\n********************************\n") + raise + + +def update_expt_status(expt: dict, name: str, refresh: bool = False, debug: bool = False, + submit: bool = True) -> dict: + """ + This function reads the dictionary showing the location of a given experiment, runs a + `rocotorun` command to update the experiment (running new jobs and updating the status of + previously submitted ones), and reads the rocoto database file to update the status of + each job for that experiment in the experiment dictionary. + + The function then and uses a simple set of rules to combine the statuses of every task + into a useful "status" for the whole experiment, and returns the updated experiment dictionary. + + Experiment "status" levels explained: + CREATED: The experiments have been created, but the monitor script has not yet processed them. + This is immediately overwritten at the beginning of the "monitor_jobs" function, so we + should never see this status in this function. Including just for completeness sake. + SUBMITTING: All jobs are in status SUBMITTING or SUCCEEDED. This is a normal state; we will + continue to monitor this experiment. + DYING: One or more tasks have died (status "DEAD"), so this experiment has had an error. + We will continue to monitor this experiment until all tasks are either status DEAD or + status SUCCEEDED (see next entry). + DEAD: One or more tasks are at status DEAD, and the rest are either DEAD or SUCCEEDED. We + will no longer monitor this experiment. + ERROR: Could not read the rocoto database file. This will require manual intervention to + solve, so we will no longer monitor this experiment. + This status may also appear if we fail to read the rocoto database file. + RUNNING: One or more jobs are at status RUNNING, and the rest are either status QUEUED, + SUBMITTED, or SUCCEEDED. This is a normal state; we will continue to monitor this + experiment. + QUEUED: One or more jobs are at status QUEUED, and some others may be at status SUBMITTED or + SUCCEEDED. + This is a normal state; we will continue to monitor this experiment. + SUCCEEDED: All jobs are status SUCCEEDED; we will monitor for one more cycle in case there are + unsubmitted jobs remaining. + COMPLETE:All jobs are status SUCCEEDED, and we have monitored this job for an additional cycle + to ensure there are no un-submitted jobs. We will no longer monitor this experiment. + + Args: + expt (dict): A dictionary containing the information for an individual experiment, as + described in the main monitor_jobs() function. + name (str): Name of the experiment; used for logging only + refresh (bool): If true, this flag will check an experiment status even if it is listed + as DEAD, ERROR, or COMPLETE. Used for initial checks for experiments + that may have been restarted. + debug (bool): Will capture all output from rocotorun. This will allow information such + as job cards and job submit messages to appear in the log files, but can + slow down the process drastically. + submit (bool): In addition to reading the rocoto database, script will advance the + workflow by calling rocotorun. If simply generating a report, set this + to False + Returns: + dict: The updated experiment dictionary. + """ + + #If we are no longer tracking this experiment, return unchanged + if (expt["status"] in ['DEAD','ERROR','COMPLETE']) and not refresh: + return expt + # Update experiment, read rocoto database + rocoto_db = f"{expt['expt_dir']}/FV3LAM_wflow.db" + rocoto_xml = f"{expt['expt_dir']}/FV3LAM_wflow.xml" + if submit: + if refresh: + logging.info(f"Updating database for experiment {name}") + if debug: + rocotorun_cmd = ["rocotorun", f"-w {rocoto_xml}", f"-d {rocoto_db}", "-v 10"] + p = subprocess.run(rocotorun_cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, text=True) + logging.debug(p.stdout) + + #Run rocotorun again to get around rocotobqserver proliferation issue + p = subprocess.run(rocotorun_cmd, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, text=True) + logging.debug(p.stdout) + else: + rocotorun_cmd = ["rocotorun", f"-w {rocoto_xml}", f"-d {rocoto_db}"] + subprocess.run(rocotorun_cmd) + #Run rocotorun again to get around rocotobqserver proliferation issue + subprocess.run(rocotorun_cmd) + + logging.debug(f"Reading database for experiment {name}, updating experiment dictionary") + try: + # This section of code queries the "job" table of the rocoto database, returning a list + # of tuples containing the taskname, cycle, and state of each job respectively + with closing(sqlite3.connect(rocoto_db)) as connection: + with closing(connection.cursor()) as cur: + db = cur.execute('SELECT taskname,cycle,state,cores,duration from jobs').fetchall() + except: + # Some platforms (including Hera) can have a problem with rocoto jobs not submitting + # properly due to build-ups of background processes. This will resolve over time as + # rocotorun continues to be called, so let's only treat this as an error if we are + # past the first initial iteration of job submissions + if not refresh: + logging.warning(f"Unable to read database {rocoto_db}\nCan not track experiment {name}") + expt["status"] = "ERROR" + + return expt + + for task in db: + # For each entry from rocoto database, store that task's info under a dictionary key named + # TASKNAME_CYCLE; Cycle comes from the database in Unix Time (seconds), so convert to + # human-readable + cycle = datetime.utcfromtimestamp(task[1]).strftime('%Y%m%d%H%M') + if f"{task[0]}_{cycle}" not in expt: + expt[f"{task[0]}_{cycle}"] = dict() + expt[f"{task[0]}_{cycle}"]["status"] = task[2] + expt[f"{task[0]}_{cycle}"]["cores"] = task[3] + expt[f"{task[0]}_{cycle}"]["walltime"] = task[4] + + statuses = list() + for task in expt: + # Skip non-task entries + if task in ["expt_dir","status"]: + continue + statuses.append(expt[task]["status"]) + + if "DEAD" in statuses: + still_live = ["RUNNING", "SUBMITTING", "QUEUED", "FAILED"] + if any(status in still_live for status in statuses): + logging.debug(f'DEAD job in experiment {name}; continuing to track until all jobs are '\ + 'complete') + expt["status"] = "DYING" + else: + expt["status"] = "DEAD" + return expt + elif "RUNNING" in statuses: + expt["status"] = "RUNNING" + elif "QUEUED" in statuses: + expt["status"] = "QUEUED" + elif "FAILED" in statuses or "SUBMITTING" in statuses: + # Job in "FAILED" status means it will be retried + expt["status"] = "SUBMITTING" + elif "SUCCEEDED" in statuses: + # If all task statuses are "SUCCEEDED", set the experiment status to "SUCCEEDED". This + # will trigger a final check using rocotostat to make sure there are no remaining un- + # started tests. + expt["status"] = "SUCCEEDED" + elif expt["status"] == "CREATED": + # Some platforms (including Hera) can have a problem with rocoto jobs not submitting + # properly due to build-ups of background processes. This will resolve over time as + # rocotorun continues to be called, so let's only print this warning message if we + # are past the first initial iteration of job submissions + if not refresh: + logging.warning(dedent( + """WARNING:Tasks have not yet been submitted for experiment {name}; + it could be that your jobs are being throttled at the system level. + + If you continue to see this message, there may be an error with your + experiment configuration, such as an incorrect queue or account number. + + You can use ctrl-c to pause this script and inspect log files. + """)) + else: + logging.fatal("Some kind of horrible thing has happened") + raise ValueError(dedent( + f"""Some kind of horrible thing has happened to the experiment status + for experiment {name} + status is {expt["status"]} + all task statuses are {statuses}""")) + + # Final check for experiments where all tasks are "SUCCEEDED"; since the rocoto database does + # not include info on jobs that have not been submitted yet, use rocotostat to check that + # there are no un-submitted jobs remaining. + if expt["status"] in ["SUCCEEDED","STALLED","STUCK"]: + expt = compare_rocotostat(expt,name) + + return expt + +def update_expt_status_parallel(expts_dict: dict, procs: int, refresh: bool = False, + debug: bool = False) -> dict: + """ + This function updates an entire set of experiments in parallel, drastically speeding up + the process if given enough parallel processes. Given a dictionary of experiments, it will + pass each individual experiment dictionary to update_expt_status() to be updated, making use + of the python multiprocessing starmap functionality to achieve this in parallel + + Args: + expts_dict (dict): A dictionary containing information for all experiments + procs (int): The number of parallel processes + refresh (bool): "Refresh" flag to pass to update_expt_status() + debug (bool): Will capture all output from rocotorun. This will allow information such + as job cards and job submit messages to appear in the log files, but can + slow down the process drastically. + + Returns: + dict: The updated dictionary of experiment dictionaries + """ + + args = [] + # Define a tuple of arguments to pass to starmap + for expt in expts_dict: + args.append( (expts_dict[expt],expt,refresh,debug) ) + + # call update_expt_status() in parallel + with Pool(processes=procs) as pool: + output = pool.starmap(update_expt_status, args) + + # Update dictionary with output from all calls to update_expt_status() + i = 0 + for expt in expts_dict: + expts_dict[expt] = output[i] + i += 1 + + return expts_dict + + + +def print_test_info(txtfile: str = "WE2E_test_info.txt") -> None: + """Prints a pipe ( | ) delimited text file containing summaries of each test defined by a + config file in test_configs/* + + Args: + txtfile (str): File name for test details file + """ + + testfiles = glob.glob('test_configs/**/config*.yaml', recursive=True) + testdict = dict() + links = dict() + for testfile in testfiles: + # Calculate relative cost of test based on config settings using legacy script + cost_array = calculate_cost(testfile) + cost = cost_array[1] / cost_array[3] + #Decompose full file path into relevant bits + pathname, filename = os.path.split(testfile) + testname = filename[7:-5] + dirname = os.path.basename(os.path.normpath(pathname)) + if os.path.islink(testfile): + targettestfile = os.readlink(testfile) + targetfilename = os.path.basename(targettestfile) + targettestname = targetfilename[7:-5] + links[testname] = (testname, dirname, targettestname) + else: + testdict[testname] = load_config_file(testfile) + testdict[testname]["directory"] = dirname + testdict[testname]["cost"] = cost + #Calculate number of forecasts for a cycling run + if testdict[testname]['workflow']["DATE_FIRST_CYCL"] != \ + testdict[testname]['workflow']["DATE_LAST_CYCL"]: + begin = datetime.strptime(testdict[testname]['workflow']["DATE_FIRST_CYCL"], + '%Y%m%d%H') + end = datetime.strptime(testdict[testname]['workflow']["DATE_LAST_CYCL"], + '%Y%m%d%H') + diff = end - begin + diffh = diff.total_seconds() // 3600 + nf = diffh // testdict[testname]['workflow']["INCR_CYCL_FREQ"] + testdict[testname]["num_fcsts"] = nf + else: + testdict[testname]["num_fcsts"] = 1 + + # For each found link, add its info to the appropriate test dictionary entry + for key, link in links.items(): + alt_testname, alt_dirname, link_name = link + testdict[link_name]["alternate_name"] = alt_testname + testdict[link_name]["alternate_directory_name"] = alt_dirname + + # Print the file + with open(txtfile, 'w', encoding="utf-8") as f: + # Field delimiter character + d = "\" | \"" + txt_output = ['"Test Name'] + txt_output.append(f'(Subdirectory){d}Alternate Test Names') + txt_output.append(f'(Subdirectories){d}Test Purpose/Description{d}Relative Cost of Running Dynamics') + txt_output.append(f'(1 corresponds to running a 6-hour forecast on the RRFS_CONUS_25km predefined grid using the default time step){d}PREDEF_GRID_NAME{d}CCPP_PHYS_SUITE{d}EXTRN_MDL_NAME_ICS{d}EXTRN_MDL_NAME_LBCS{d}DATE_FIRST_CYCL{d}DATE_LAST_CYCL{d}INCR_CYCL_FREQ{d}FCST_LEN_HRS{d}DT_ATMOS{d}LBC_SPEC_INTVL_HRS{d}NUM_ENS_MEMBERS') + + for line in txt_output: + f.write(f"{line}\n") + for expt in testdict: + f.write(f"\"{expt}\n(") + f.write(f"{testdict[expt]['directory']}){d}") + if "alternate_name" in testdict[expt]: + f.write(f"{testdict[expt]['alternate_name']}\n"\ + f"({testdict[expt]['alternate_directory_name']}){d}") + else: + f.write(f"{d}\n") + desc = testdict[expt]['metadata']['description'].splitlines() + for line in desc[:-1]: + f.write(f" {line}\n") + f.write(f" {desc[-1]}") + #Write test relative cost and number of test forecasts (for cycling runs) + f.write(f"{d}'{round(testdict[expt]['cost'],2)}{d}'{round(testdict[expt]['num_fcsts'])}") + # Bundle various variables with their corresponding sections for more compact coding + key_pairs = [ ('workflow', 'PREDEF_GRID_NAME'), + ('workflow', 'CCPP_PHYS_SUITE'), + ('task_get_extrn_ics', 'EXTRN_MDL_NAME_ICS'), + ('task_get_extrn_lbcs', 'EXTRN_MDL_NAME_LBCS'), + ('workflow', 'DATE_FIRST_CYCL'), + ('workflow', 'DATE_LAST_CYCL'), + ('workflow', 'INCR_CYCL_FREQ'), + ('workflow', 'FCST_LEN_HRS'), + ('task_run_fcst', 'DT_ATMOS'), + ('task_get_extrn_lbcs', 'LBC_SPEC_INTVL_HRS'), + ('global', 'NUM_ENS_MEMBERS') ] + + for key1, key2 in key_pairs: + f.write(f"{d}{testdict[expt].get(key1, {}).get(key2, '')}") + f.write("\n") + + +def compare_rocotostat(expt_dict,name): + """Reads the dictionary showing the location of a given experiment, runs a `rocotostat` command + to get the full set of tasks for the experiment, and compares the two to see if there are any + unsubmitted tasks remaining. + """ + + # Call rocotostat and store output + rocoto_db = f"{expt_dict['expt_dir']}/FV3LAM_wflow.db" + rocoto_xml = f"{expt_dict['expt_dir']}/FV3LAM_wflow.xml" + rocotorun_cmd = ["rocotostat", f"-w {rocoto_xml}", f"-d {rocoto_db}", "-v 10"] + p = subprocess.run(rocotorun_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + rsout = p.stdout + + # Parse each line of rocotostat output, extracting relevant information + untracked_tasks = [] + for line in rsout.split('\n'): + # Skip blank lines and dividing lines of '=====...' + if not line: + continue + if line[0] == '=': + continue + line_array = line.split() + # Skip header lines + if line_array[0] == 'CYCLE': + continue + # We should now just have lines describing jobs, in the form: + # line_array = ['cycle','task','jobid','status','exit status','num tries','walltime'] + + # As defined in update_expt_status(), the "task names" in the dictionary are a combination + # of the task name and cycle + taskname = f'{line_array[1]}_{line_array[0]}' + + # If we're already tracking this task, continue + if expt_dict.get(taskname): + continue + + # Otherwise, extract information into dictionary of untracked tasks + untracked_tasks.append(taskname) + + if untracked_tasks: + # We want to give this a couple loops before reporting that it is "stuck" + if expt_dict['status'] == 'SUCCEEDED': + expt_dict['status'] = 'STALLED' + elif expt_dict['status'] == 'STALLED': + expt_dict['status'] = 'STUCK' + elif expt_dict['status'] == 'STUCK': + msg = f"WARNING: For experiment {name}, there are jobs that are not being submitted:" + for ut in untracked_tasks: + msg += ut + msg = msg + f"""WARNING: For experiment {name}, + there are some jobs that are not being submitted. + It could be that your jobs are being throttled at the system level, or + some task dependencies have not been met. + + If you continue to see this message, there may be an error with your + experiment configuration. + + You can use ctrl-c to pause this script and inspect log files. + """ + logging.warning(dedent(msg)) + else: + logging.fatal("Some kind of horrible thing has happened") + raise ValueError(dedent( + f"""Some kind of horrible thing has happened to the experiment status + for experiment {name} + status is {expt_dict["status"]} + untracked tasknames are {untracked_tasks}""")) + else: + expt_dict["status"] = "COMPLETE" + + return expt_dict diff --git a/ush/calculate_cost.py b/ush/calculate_cost.py index 1abe729545..731cce76f7 100755 --- a/ush/calculate_cost.py +++ b/ush/calculate_cost.py @@ -6,7 +6,6 @@ from python_utils import ( set_env_var, - import_vars, load_config_file, flatten_dict, ) @@ -17,88 +16,68 @@ def calculate_cost(config_fn): - global PREDEF_GRID_NAME, QUILTING, GRID_GEN_METHOD - - # import all environment variables - IMPORTS = [ - "PREDEF_GRID_NAME", - "QUILTING", - "GRID_GEN_METHOD", - "DT_ATMOS", - "LAYOUT_X", - "LAYOUT_Y", - "BLOCKSIZE", - ] - import_vars(env_vars=IMPORTS) - ushdir = os.path.dirname(os.path.abspath(__file__)) - # get grid config parameters (predefined or custom) - if PREDEF_GRID_NAME: - QUILTING = False + cfg_u = load_config_file(config_fn) + cfg_u = flatten_dict(cfg_u) + + if 'PREDEF_GRID_NAME' in cfg_u: params_dict = set_predef_grid_params( USHdir=ushdir, - grid_name=PREDEF_GRID_NAME, - quilting=QUILTING, + grid_name=cfg_u['PREDEF_GRID_NAME'], + quilting=True ) - for param, value in params_dict.items(): - if param in IMPORTS and globals()[param] is not None: - params_dict[param] = globals()[param] - import_vars(dictionary=params_dict) + + # merge cfg_u with defaults, duplicate keys in cfg_u will overwrite defaults + cfg = {**params_dict, **cfg_u} else: - cfg_u = load_config_file(config_fn) - cfg_u = flatten_dict(cfg_u) - import_vars(dictionary=cfg_u) + cfg = cfg_u # number of gridpoints (nx*ny) depends on grid generation method - if GRID_GEN_METHOD == "GFDLgrid": + if cfg['GRID_GEN_METHOD'] == "GFDLgrid": grid_params = set_gridparams_GFDLgrid( - lon_of_t6_ctr=GFDLgrid_LON_T6_CTR, - lat_of_t6_ctr=GFDLgrid_LAT_T6_CTR, - res_of_t6g=GFDLgrid_NUM_CELLS, - stretch_factor=GFDLgrid_STRETCH_FAC, - refine_ratio_t6g_to_t7g=GFDLgrid_REFINE_RATIO, - istart_of_t7_on_t6g=GFDLgrid_ISTART_OF_RGNL_DOM_ON_T6G, - iend_of_t7_on_t6g=GFDLgrid_IEND_OF_RGNL_DOM_ON_T6G, - jstart_of_t7_on_t6g=GFDLgrid_JSTART_OF_RGNL_DOM_ON_T6G, - jend_of_t7_on_t6g=GFDLgrid_JEND_OF_RGNL_DOM_ON_T6G, + lon_of_t6_ctr=cfg['GFDLgrid_LON_T6_CTR'], + lat_of_t6_ctr=cfg['GFDLgrid_LAT_T6_CTR'], + res_of_t6g=cfg['GFDLgrid_NUM_CELLS'], + stretch_factor=cfg['GFDLgrid_STRETCH_FAC'], + refine_ratio_t6g_to_t7g=cfg['GFDLgrid_REFINE_RATIO'], + istart_of_t7_on_t6g=cfg['GFDLgrid_ISTART_OF_RGNL_DOM_ON_T6G'], + iend_of_t7_on_t6g=cfg['GFDLgrid_IEND_OF_RGNL_DOM_ON_T6G'], + jstart_of_t7_on_t6g=cfg['GFDLgrid_JSTART_OF_RGNL_DOM_ON_T6G'], + jend_of_t7_on_t6g=cfg['GFDLgrid_JEND_OF_RGNL_DOM_ON_T6G'], run_envir="community", verbose=False, nh4=4, ) - elif GRID_GEN_METHOD == "ESGgrid": + elif cfg['GRID_GEN_METHOD'] == "ESGgrid": constants = load_config_file(os.path.join(ushdir, "constants.yaml")) grid_params = set_gridparams_ESGgrid( - lon_ctr=ESGgrid_LON_CTR, - lat_ctr=ESGgrid_LAT_CTR, - nx=ESGgrid_NX, - ny=ESGgrid_NY, - pazi=ESGgrid_PAZI, - halo_width=ESGgrid_WIDE_HALO_WIDTH, - delx=ESGgrid_DELX, - dely=ESGgrid_DELY, + lon_ctr=cfg['ESGgrid_LON_CTR'], + lat_ctr=cfg['ESGgrid_LAT_CTR'], + nx=cfg['ESGgrid_NX'], + ny=cfg['ESGgrid_NY'], + pazi=cfg['ESGgrid_PAZI'], + halo_width=cfg['ESGgrid_WIDE_HALO_WIDTH'], + delx=cfg['ESGgrid_DELX'], + dely=cfg['ESGgrid_DELY'], constants=constants["constants"], ) + else: + raise ValueError("GRID_GEN_METHOD is set to an invalid value") - NX = grid_params["NX"] - NY = grid_params["NY"] - cost = [DT_ATMOS, NX * NY] + cost = [cfg['DT_ATMOS'], grid_params["NX"] * grid_params["NY"] ] # reference grid (6-hour forecast on RRFS_CONUS_25km) PREDEF_GRID_NAME = "RRFS_CONUS_25km" - params_dict = set_predef_grid_params( - USHdir=os.path.dirname(os.path.abspath(__file__)), + refgrid = set_predef_grid_params( + USHdir=ushdir, grid_name=PREDEF_GRID_NAME, - quilting=QUILTING, + quilting=True, ) - for param, value in params_dict.items(): - if param in IMPORTS and globals()[param] is not None: - params_dict[param] = globals()[param] - import_vars(dictionary=params_dict) - cost.extend([DT_ATMOS, ESGgrid_NX * ESGgrid_NY]) + cost.extend([refgrid['DT_ATMOS'], refgrid['ESGgrid_NX'] * refgrid['ESGgrid_NY']]) return cost @@ -124,16 +103,6 @@ def calculate_cost(config_fn): class Testing(unittest.TestCase): def test_calculate_cost(self): USHdir = os.path.dirname(os.path.abspath(__file__)) - params = calculate_cost(None) - self.assertCountEqual(params, [36, 1987440, 36, 28689]) - - def setUp(self): - set_env_var("DEBUG", False) - set_env_var("VERBOSE", False) - set_env_var("PREDEF_GRID_NAME", "RRFS_CONUS_3km") - set_env_var("DT_ATMOS", 36) - set_env_var("LAYOUT_X", 18) - set_env_var("LAYOUT_Y", 36) - set_env_var("BLOCKSIZE", 28) - set_env_var("QUILTING", False) - set_env_var("RUN_ENVIR", "community") + params = calculate_cost(os.path.join(USHdir, 'config.community.yaml')) + self.assertCountEqual(params, [180, 28689, 180, 28689]) + diff --git a/ush/python_utils/config_parser.py b/ush/python_utils/config_parser.py index 6510af62eb..a66be884f2 100644 --- a/ush/python_utils/config_parser.py +++ b/ush/python_utils/config_parser.py @@ -15,6 +15,7 @@ """ import argparse +import datetime # # Note: Yaml maynot be available in which case we suppress @@ -97,6 +98,14 @@ def path_join(arg): return os.path.join(*arg) +def days_ago(arg): + """A filter for jinja2 that gives us a date string for x number of + days ago""" + + return (datetime.date.today() - + datetime.timedelta(days=arg)).strftime("%Y%m%d00") + + def extend_yaml(yaml_dict, full_dict=None): """ @@ -140,6 +149,7 @@ def extend_yaml(yaml_dict, full_dict=None): loader=jinja2.BaseLoader, undefined=jinja2.StrictUndefined ) j2env.filters["path_join"] = path_join + j2env.filters["days_ago"] = days_ago j2tmpl = j2env.from_string(template) try: # Fill in a template that has the appropriate variables