ufs-community · gsketefian · May 2, 2022 · Mar 31, 2022 · Mar 31, 2022 · Apr 14, 2022
@@ -12,8 +12,8 @@
 # directory represent active experiments (see below for how this is done).
 # For all such experiments, it calls the workflow (re)launch script to 
 # update the status of the workflow and prints the status out to screen.
-# It also generates a summary status file in the base directory that 
-# contains the last num_tail_lines lines (defined below) of each experiment's 
+# It also generates a status report file in the base directory that 
+# contains the last num_log_lines lines (defined below) of each experiment's 
 # workflow log file [which is generated by the (re)launch script] and thus 
 # has information on which tasks may have succeeded/failed].
 #
@@ -70,42 +70,112 @@ ushdir="$homerrfs/ush"
 #
 #-----------------------------------------------------------------------
 #
-# Exactly one argument must be specified that consists of the full path
-# to the experiments base directory (i.e. the directory containing the 
-# experiment subdirectories).  Ensure that the number of arguments is 
-# one.
+# Set the usage message.
 #
 #-----------------------------------------------------------------------
 #
-num_args="$#"
-if [ "${num_args}" -eq 1 ]; then
-  expts_basedir="$1"
-else
-  print_err_msg_exit "
-The number of arguments to this script must be exacty one, and that 
-argument must specify the experiments base directory, i.e. the directory
-containing the experiment subdirectories.  The acutal number of arguments 
-is:
-  num_args = ${num_args}"  
+usage_str="\
+Usage:
+
+  ${scrfunc_fn} \\
+    expts_basedir=\"...\" \\
+    [verbose=\"...\"]
+
+The arguments in brackets are optional.  The arguments are defined as
+follows:
+
+expts_basedir:
+Full path to the experiments base directory, i.e. the directory containing 
+the experiment subdirectories.
+
+num_log_lines:
+Optional integer specifying the number of lines from the end of the 
+workflow launch log file (log.launch_FV3LAM_wflow) of each test to 
+include in the status report file that this script generates.
+
+verbose:
+Optional verbosity flag.  Should be set to \"TRUE\" or \"FALSE\".  Default
+is \"FALSE\".
+"
+#
+#-----------------------------------------------------------------------
+#
+# Check to see if usage help for this script is being requested.  If so,
+# print it out and exit with a 0 exit code (success).
+#
+#-----------------------------------------------------------------------
+#
+help_flag="--help"
+if [ "$#" -eq 1 ] && [ "$1" = "${help_flag}" ]; then
+  print_info_msg "${usage_str}"
+  exit 0
+fi
+#
+#-----------------------------------------------------------------------
+#
+# Specify the set of valid argument names for this script or function.
+# Then process the arguments provided to it on the command line (which
+# should consist of a set of name-value pairs of the form arg1="value1",
+# arg2="value2", etc).
+#
+#-----------------------------------------------------------------------
+#
+valid_args=( \
+  "expts_basedir" \
+  "num_log_lines" \
+  "verbose" \
+  )
+process_args valid_args "$@"
+#
+#-----------------------------------------------------------------------
+#
+# Set the default value of "num_log_lines".
+#
+#-----------------------------------------------------------------------
+#
+num_log_lines=${num_log_lines:-"40"}
+#
+#-----------------------------------------------------------------------
+#
+# Make the default value of "verbose" "FALSE".  Then make sure "verbose"
+# is set to a valid value.
+#
+#-----------------------------------------------------------------------
+#
+verbose=${verbose:-"FALSE"}
+check_var_valid_value "verbose" "valid_vals_BOOLEAN"
+verbose=$(boolify $verbose)
+#
+#-----------------------------------------------------------------------
+#
+# Verify that the required arguments to this script have been specified.
+# If not, print out an error message and exit.
+#
+#-----------------------------------------------------------------------
+#
+help_msg="\
+Use
+  ${scrfunc_fn} ${help_flag}
+to get help on how to use this script."
+
+if [ -z "${expts_basedir}" ]; then
+  print_err_msg_exit "\
+The argument \"expts_basedir\" specifying the base directory containing
+the experiment directories was not specified in the call to this script.  \
+${help_msg}"
 fi
 #
 #-----------------------------------------------------------------------
 #
 # Check that the specified experiments base directory exists and is 
 # actually a directory.  If not, print out an error message and exit.
-# If so, print out an informational message.
 #
 #-----------------------------------------------------------------------
 #
 if [ ! -d "${expts_basedir}" ]; then
   print_err_msg_exit "
-The experiments base directory (expts_basedir) does not exit or is not 
-actually a directory:
-  expts_basedir = \"${expts_basedir}\""
-else
-  print_info_msg "
-Checking the workflow status of all forecast experiments in the following
-specified experiments base directory: 
+The specified experiments base directory (expts_basedir) does not exit 
+or is not actually a directory:
   expts_basedir = \"${expts_basedir}\""
 fi
 #
@@ -116,7 +186,7 @@ fi
 #
 #-----------------------------------------------------------------------
 #
-cd "${expts_basedir}"
+cd_vrfy "${expts_basedir}"
 #
 # Get a list of all subdirectories (but not files) in the experiment base 
 # directory.  Note that the ls command below will return a string containing
@@ -175,6 +245,12 @@ var_defns_fn="var_defns.sh"
 j="0"
 expt_subdirs=()
 
+print_info_msg "\
+Checking for active experiment directories in the specified experiments
+base directory (expts_basedir):
+  expts_basedir = \"${expts_basedir}\"
+..."
+
 num_subdirs="${#subdirs_list[@]}"
 for (( i=0; i<=$((num_subdirs-1)); i++ )); do
 
@@ -184,7 +260,7 @@ $separator
 Checking whether the subdirectory 
   \"${subdir}\"
 contains an active experiment..."
-  print_info_msg "$msg"
+  print_info_msg "$verbose" "$msg"
 
   cd_vrfy "${subdir}"
 #
@@ -193,7 +269,7 @@ contains an active experiment..."
 #
   if [ ! -f "${var_defns_fn}" ]; then
 
-    print_info_msg "
+    print_info_msg "$verbose" "
 The current subdirectory (subdir) under the experiments base directory
 (expts_basedir) does not contain an experiment variable defintions file
 (var_defns_fn):
@@ -219,7 +295,7 @@ must be checked."
 #
     if [ "${EXPT_SUBDIR}" = "$subdir" ]; then
 
-      print_info_msg "
+      print_info_msg "$verbose" "
 The current subdirectory (subdir) under the experiments base directory
 (expts_basedir) contains an active experiment:
   expts_basedir = \"${expts_basedir}\"
@@ -238,7 +314,7 @@ subdirectories whose workflow status must be checked."
 #
     else
 
-      print_info_msg "
+      print_info_msg "$verbose" "
 The current subdirectory (subdir) under the experiments base directory
 (expts_basedir) contains an experiment whose original name (EXPT_SUBDIR)
 does not match the name of the current subdirectory:
@@ -254,7 +330,7 @@ status must be checked."
 
   fi
 
-  print_info_msg "\
+  print_info_msg "$verbose" "\
 $separator
 "
 #
@@ -302,15 +378,14 @@ check_for_preexist_dir_file "${expts_status_fp}" "rename"
 # Loop through the elements of the array expt_subdirs.  For each element
 # (i.e. for each active experiment), change location to the experiment 
 # directory and call the script launch_FV3LAM_wflow.sh to update the log 
-# file log.launch_FV3LAM_wflow.  Then take the last num_tail_lines of 
+# file log.launch_FV3LAM_wflow.  Then take the last num_log_lines of 
 # this log file (along with an appropriate message) and add it to the 
 # status report file.
 #
 #-----------------------------------------------------------------------
 #
 launch_wflow_fn="launch_FV3LAM_wflow.sh"
 launch_wflow_log_fn="log.launch_FV3LAM_wflow"
-num_tail_lines="40"
 
 for (( i=0; i<=$((num_expts-1)); i++ )); do
 
@@ -326,25 +401,28 @@ Checking workflow status of experiment \"${expt_subdir}\" ..."
 #
   cd_vrfy "${expt_subdir}"
   launch_msg=$( "${launch_wflow_fn}" 2>&1 )
-  log_tail=$( tail -n ${num_tail_lines} "${launch_wflow_log_fn}" )
+  log_tail=$( tail -n ${num_log_lines} "${launch_wflow_log_fn}" )
 #
 # Print the workflow status to the screen.
 #
-  wflow_status=$( printf "${log_tail}" | grep "Workflow status:" )
-#  wflow_status="${wflow_status## }"  # Not sure why this doesn't work to strip leading spaces.
-  wflow_status=$( printf "${wflow_status}" "%s" | sed -r 's|^[ ]*||g' )  # Remove leading spaces.
+  # The "tail -1" is to get only the last occurrence of "Workflow status"
+  wflow_status=$( printf "${log_tail}" | grep "Workflow status:" | tail -1 )
+  # Not sure why this doesn't work to strip leading spaces.
+#  wflow_status="${wflow_status## }"
+  # Remove leading spaces.
+  wflow_status=$( printf "${wflow_status}" "%s" | sed -r 's|^[ ]*||g' )
   print_info_msg "${wflow_status}"
   print_info_msg "\
 $separator
 "
 #
-# Combine message above with the last num_tail_lines lines from the workflow 
+# Combine message above with the last num_log_lines lines from the workflow 
 # launch log file and place the result in the status report file.
 #
   msg=$msg"
 ${wflow_status}
 
-The last ${num_tail_lines} lines of this experiment's workflow launch log file 
+The last ${num_log_lines} lines of this experiment's workflow launch log file 
 (\"${launch_wflow_log_fn}\") are:
 
 ${log_tail}
@@ -360,4 +438,7 @@ ${log_tail}
 done
 
 print_info_msg "\
+A status report has been created in:
+  expts_status_fp = \"${expts_status_fp}\"
+
 DONE."
@@ -133,9 +133,9 @@ tests under subdirectory testset1, another set of tests under testset2,
 etc.
 
 exec_subdir:
-Optional. Argument is used to set the EXEC_SUBDIR configuration
-variable. Please see the ush/default_configs.sh file for a full
-description.
+Optional.  Argument used to set the EXEC_SUBDIR experiment variable. 
+Please see the default experiment configuration file \"config_defaults.sh\" 
+for a full description of EXEC_SUBDIR.
 
 use_cron_to_relaunch:
 Argument used to explicitly set the experiment variable USE_CRON_TO_RELAUNCH
@@ -208,14 +208,85 @@ Same as the argument \"stmp\" described above but for setting the
 experiment variable PTMP for all tests that will run in NCO mode.
 
 compiler:
-Type of compiler to use for the workflow. Options are \"intel\" 
-and \"gnu\". Default is \"intel\",
+Type of compiler to use for the workflow. Options are \"intel\" and \"gnu\". 
+Default is \"intel\".
 
 build_mod_fn:
 Specify the build module files (see ufs-srweather-app/modulefiles) to 
 use for the workflow. (e.g. build_cheyenne_gnu). If a 
 \"gnu\" compiler is specified, it must also be specified with 
 the \"compiler\" option.
+
+
+Usage Examples:
+--------------
+Here, we give several common usage examples.  In the following, assume 
+my_tests.txt is a text file in the same directory as this script containing 
+a list of test names that we want to run, e.g.
+
+> more my_tests.txt
+new_ESGgrid
+specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE
+
+Then:
+
+1) To run the tests listed in my_tests.txt on Hera and charge the core-
+   hours used to the \"rtrr\" account, use:
+
+     > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\"
+
+   This will create the experiment subdirectories for the two tests in
+   the directory
+
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs
+
+   where SR_WX_APP_TOP_DIR is the directory in which the ufs-srweather-app 
+   repository is cloned.  Thus, the following two experiment directories
+   will be created:
+
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs/new_ESGgrid
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs/specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE
+
+   In addition, by default, cron jobs will be created in the user's cron
+   table to relaunch the workflows of these experiments every 2 minutes.
+
+2) To change the frequency with which the cron relaunch jobs are submitted
+   from the default of 2 minutes to 1 minute, use:
+
+     > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" cron_relaunch_intvl_mnts=\"01\"
+
+3) To disable use of cron (which means the worfkow for each test will 
+   have to be relaunched manually from within each experiment directory),
+   use:
+
+     > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" use_cron_to_relaunch=\"FALSE\"
+
+4) To place the experiment subdirectories in a subdirectory named \"test_set_01\"
+   under 
+
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs
+
+   (instead of immediately under the latter), use:
+
+     > run_WE2E_tests.sh tests_file=\"my_tests.txt\" machine=\"hera\" account=\"rtrr\" expt_basedir=\"test_set_01\"
+
+   In this case, the full paths to the experiment directories will be:
+
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs/test_set_01/new_ESGgrid
+     \${SR_WX_APP_TOP_DIR}/../expt_dirs/test_set_01/specify_DT_ATMOS_LAYOUT_XY_BLOCKSIZE
+
+5) To use a list of tests that is located in
+
+     /path/to/custom/my_tests.txt
+
+   instead of in the same directory as this script, and to have the 
+   experiment directories be placed in an arbitrary location, say 
+
+     /path/to/custom/expt_dirs
+
+   use:
+
+     > run_WE2E_tests.sh tests_file=\"/path/to/custom/my_tests.txt\" machine=\"hera\" account=\"rtrr\" expt_basedir=\"/path/to/custom/expt_dirs\"
 "
 #
 #-----------------------------------------------------------------------