diff --git a/CIME/SystemTests/pgn.py b/CIME/SystemTests/pgn.py index 83199ec674e..07ac1f4cb08 100644 --- a/CIME/SystemTests/pgn.py +++ b/CIME/SystemTests/pgn.py @@ -16,7 +16,7 @@ import logging from collections import OrderedDict -from shutils import copytree +from shutil import copytree import pandas as pd import numpy as np diff --git a/CIME/SystemTests/system_tests_common.py b/CIME/SystemTests/system_tests_common.py index 3c1f3e99830..42e1b897e58 100644 --- a/CIME/SystemTests/system_tests_common.py +++ b/CIME/SystemTests/system_tests_common.py @@ -38,7 +38,7 @@ ) import CIME.build as build -import glob, gzip, time, traceback, os +import glob, gzip, time, traceback, os, math from contextlib import ExitStack logger = logging.getLogger(__name__) @@ -174,7 +174,7 @@ def _set_restart_interval(self): expect(False, f"stop_option {stop_option} not available for this test") stop_n = int(stop_n * factor // coupling_secs) - rest_n = int((stop_n // 2 + 1) * coupling_secs / factor) + rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor) expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n)) diff --git a/CIME/Tools/xmlchange b/CIME/Tools/xmlchange index 46debd906a2..4101f9b5dba 100755 --- a/CIME/Tools/xmlchange +++ b/CIME/Tools/xmlchange @@ -55,7 +55,6 @@ from standard_script_setup import * from CIME.utils import ( expect, convert_to_type, - get_batch_script_for_job, Timeout, ) from CIME.status import append_case_status diff --git a/CIME/XML/env_batch.py b/CIME/XML/env_batch.py index b444a29333a..580a8e9d434 100644 --- a/CIME/XML/env_batch.py +++ b/CIME/XML/env_batch.py @@ -39,6 +39,7 @@ def __init__(self, case_root=None, infile="env_batch.xml", read_only=False): case_root, infile, schema=schema, read_only=read_only ) self._batchtype = self.get_batch_system_type() + self._env_workflow = None # pylint: disable=arguments-differ def set_value(self, item, value, subgroup=None, ignore_type=False): @@ -204,14 +205,16 @@ def set_batch_system(self, batchobj, batch_system_type=None): lock_file(os.path.basename(batchobj.filename), self._caseroot) def get_job_overrides(self, job, case): - env_workflow = case.get_env("workflow") + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") ( total_tasks, num_nodes, tasks_per_node, thread_count, ngpus_per_node, - ) = env_workflow.get_job_specs(case, job) + ) = self._env_workflow.get_job_specs(case, job) + overrides = {} if total_tasks: @@ -257,7 +260,16 @@ def make_batch_script(self, input_template, job, case, outfile=None): subgroup=job, overrides=overrides, ) - output_name = get_batch_script_for_job(job) if outfile is None else outfile + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") + + output_name = ( + get_batch_script_for_job( + job, hidden=self._env_workflow.hidden_job(case, job) + ) + if outfile is None + else outfile + ) logger.info("Creating file {}".format(output_name)) with open(output_name, "w") as fd: fd.write(output_text) @@ -274,8 +286,10 @@ def set_job_defaults(self, batch_jobs, case): if self._batchtype == "none": return - env_workflow = case.get_env("workflow") - known_jobs = env_workflow.get_jobs() + + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") + known_jobs = self._env_workflow.get_jobs() for job, jsect in batch_jobs: if job not in known_jobs: @@ -432,11 +446,13 @@ def set_job_defaults(self, batch_jobs, case): seconds = convert_to_seconds(walltime) full_bab_time = convert_to_babylonian_time(seconds) walltime = format_time(walltime_format, "%H:%M:%S", full_bab_time) + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") - env_workflow.set_value( + self._env_workflow.set_value( "JOB_QUEUE", self.text(queue), subgroup=job, ignore_type=False ) - env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job) + self._env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job) logger.debug( "Job {} queue {} walltime {}".format(job, self.text(queue), walltime) ) @@ -739,13 +755,22 @@ def submit_jobs( waiting to resubmit at the end of the first sequence workflow is a logical indicating whether only "job" is submitted or the workflow sequence starting with "job" is submitted """ - env_workflow = case.get_env("workflow") + external_workflow = case.get_value("EXTERNAL_WORKFLOW") - alljobs = env_workflow.get_jobs() + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") + alljobs = self._env_workflow.get_jobs() alljobs = [ j for j in alljobs - if os.path.isfile(os.path.join(self._caseroot, get_batch_script_for_job(j))) + if os.path.isfile( + os.path.join( + self._caseroot, + get_batch_script_for_job( + j, hidden=self._env_workflow.hidden_job(case, j) + ), + ) + ) ] startindex = 0 @@ -761,7 +786,9 @@ def submit_jobs( if index < startindex: continue try: - prereq = env_workflow.get_value("prereq", subgroup=job, resolved=False) + prereq = self._env_workflow.get_value( + "prereq", subgroup=job, resolved=False + ) if ( external_workflow or prereq is None @@ -780,7 +807,9 @@ def submit_jobs( ), ) if prereq: - jobs.append((job, env_workflow.get_value("dependency", subgroup=job))) + jobs.append( + (job, self._env_workflow.get_value("dependency", subgroup=job)) + ) if self._batchtype == "cobalt": break @@ -1065,13 +1094,17 @@ def _submit_single_job( set_continue_run=resubmit_immediate, submit_resubmits=workflow and not resubmit_immediate, ) + if batch_system == "lsf" and not batch_env_flag: sequence = ( run_args, batchsubmit, submitargs, batchredirect, - get_batch_script_for_job(job), + get_batch_script_for_job( + job, + hidden=self._env_workflow.hidden_job(case, job), + ), ) elif batch_env_flag: sequence = ( @@ -1079,14 +1112,26 @@ def _submit_single_job( submitargs, run_args, batchredirect, - os.path.join(self._caseroot, get_batch_script_for_job(job)), + os.path.join( + self._caseroot, + get_batch_script_for_job( + job, + hidden=self._env_workflow.hidden_job(case, job), + ), + ), ) else: sequence = ( batchsubmit, submitargs, batchredirect, - os.path.join(self._caseroot, get_batch_script_for_job(job)), + os.path.join( + self._caseroot, + get_batch_script_for_job( + job, + hidden=self._env_workflow.hidden_job(case, job), + ), + ), run_args, ) @@ -1377,12 +1422,13 @@ def compare_xml(self, other): def make_all_batch_files(self, case): machdir = case.get_value("MACHDIR") - env_workflow = case.get_env("workflow") logger.info("Creating batch scripts") - jobs = env_workflow.get_jobs() + if not self._env_workflow: + self._env_workflow = case.get_env("workflow") + jobs = self._env_workflow.get_jobs() for job in jobs: template = case.get_resolved_value( - env_workflow.get_value("template", subgroup=job) + self._env_workflow.get_value("template", subgroup=job) ) if os.path.isabs(template): input_batch_script = template diff --git a/CIME/XML/env_workflow.py b/CIME/XML/env_workflow.py index c59ff23aba4..8eaa7171ce8 100644 --- a/CIME/XML/env_workflow.py +++ b/CIME/XML/env_workflow.py @@ -5,6 +5,7 @@ from CIME.XML.standard_module_setup import * from CIME.XML.env_base import EnvBase from CIME.utils import get_cime_root + import re, math logger = logging.getLogger(__name__) @@ -21,6 +22,7 @@ def __init__(self, case_root=None, infile="env_workflow.xml", read_only=False): # schema = os.path.join(get_cime_root(), "CIME", "config", "xml_schemas", "env_workflow.xsd") # TODO: define schema for this file schema = None + self._hidden = {} super(EnvWorkflow, self).__init__( case_root, infile, schema=schema, read_only=read_only ) @@ -89,7 +91,17 @@ def get_type_info(self, vid): ) return type_info + def hidden_job(self, case, job): + if job not in self._hidden: + self.get_job_specs(case, job) + return self._hidden[job] + def get_job_specs(self, case, job): + hidden = self.get_value("hidden", subgroup=job) + self._hidden[job] = (hidden is None and job != "case.st_archive") or ( + hidden is not None and hidden.lower() == "true" + ) + task_count = case.get_resolved_value(self.get_value("task_count", subgroup=job)) tasks_per_node = case.get_resolved_value( self.get_value("tasks_per_node", subgroup=job) diff --git a/CIME/case/case.py b/CIME/case/case.py index 2f2d44aaca3..6f9082b0aa4 100644 --- a/CIME/case/case.py +++ b/CIME/case/case.py @@ -1234,7 +1234,7 @@ def _setup_mach_pes(self, pecount, multi_driver, ninst, machine_name, mpilib): and value.endswith("'") ): value = value[1:-1] - if append[key]: + if key in append and append[key]: ovalue = self.get_value(key) self.set_value(key, value + " " + ovalue) diff --git a/CIME/data/config/xml_schemas/config_workflow.xsd b/CIME/data/config/xml_schemas/config_workflow.xsd index 14a82586c08..5b09913a4b6 100644 --- a/CIME/data/config/xml_schemas/config_workflow.xsd +++ b/CIME/data/config/xml_schemas/config_workflow.xsd @@ -13,6 +13,7 @@ + @@ -57,6 +58,7 @@ + diff --git a/CIME/scripts/create_test.py b/CIME/scripts/create_test.py index 07a10689b3c..74dd60e8c19 100755 --- a/CIME/scripts/create_test.py +++ b/CIME/scripts/create_test.py @@ -260,6 +260,11 @@ def parse_command_line(args, description): "\nNOTE: this can also be done after the fact with bless_test_results", ) + parser.add_argument( + "--driver", + help="Override driver specified in tests and use this one.", + ) + default = get_default_setting(config, "COMPILER", None, check_main=True) parser.add_argument( @@ -775,6 +780,7 @@ def parse_command_line(args, description): args.workflow, args.chksum, args.force_rebuild, + args.driver, ) @@ -936,6 +942,7 @@ def create_test( workflow, chksum, force_rebuild, + driver, ): ############################################################################### impl = TestScheduler( @@ -977,6 +984,7 @@ def create_test( workflow=workflow, chksum=chksum, force_rebuild=force_rebuild, + driver=driver, ) success = impl.run_tests( @@ -1081,6 +1089,7 @@ def _main_func(description=None): workflow, chksum, force_rebuild, + driver, ) = parse_command_line(sys.argv, description) success = False @@ -1134,6 +1143,7 @@ def _main_func(description=None): workflow, chksum, force_rebuild, + driver, ) run_count += 1 diff --git a/CIME/test_scheduler.py b/CIME/test_scheduler.py index 9caa8bb82bc..2e0340521e2 100644 --- a/CIME/test_scheduler.py +++ b/CIME/test_scheduler.py @@ -211,11 +211,12 @@ def __init__( workflow=None, chksum=False, force_rebuild=False, + driver=None, ): ########################################################################### self._cime_root = get_cime_root() self._cime_model = get_model() - self._cime_driver = get_cime_default_driver() + self._cime_driver = driver if driver is not None else get_cime_default_driver() self._save_timing = save_timing self._queue = queue self._test_data = ( @@ -651,6 +652,7 @@ def _create_newcase_phase(self, test): mpilib = None ninst = 1 ncpl = 1 + driver = self._cime_driver if case_opts is not None: for case_opt in case_opts: # pylint: disable=not-an-iterable if case_opt.startswith("M"): @@ -683,15 +685,16 @@ def _create_newcase_phase(self, test): ) ) elif case_opt.startswith("V"): - self._cime_driver = case_opt[1:] - create_newcase_cmd += " --driver {}".format(self._cime_driver) + driver = case_opt[1:] + + create_newcase_cmd += " --driver {}".format(driver) if ( "--ninst" in create_newcase_cmd and not "--multi-driver" in create_newcase_cmd ): if "--driver nuopc" in create_newcase_cmd or ( - "--driver" not in create_newcase_cmd and self._cime_driver == "nuopc" + "--driver" not in create_newcase_cmd and driver == "nuopc" ): expect(False, "_N option not supported by nuopc driver, use _C instead") @@ -769,9 +772,16 @@ def _xml_phase(self, test): test_dir = self._get_test_dir(test) envtest = EnvTest(test_dir) + # Find driver. It may be different for the current test if V testopt is used + driver = self._cime_driver + if case_opts is not None: + for case_opt in case_opts: # pylint: disable=not-an-iterable + if case_opt.startswith("V"): + driver = case_opt[1:] + # Determine list of component classes that this coupler/driver knows how # to deal with. This list follows the same order as compset longnames follow. - files = Files(comp_interface=self._cime_driver) + files = Files(comp_interface=driver) ufs_driver = os.environ.get("UFS_DRIVER") attribute = None if ufs_driver: @@ -779,13 +789,11 @@ def _xml_phase(self, test): drv_config_file = files.get_value("CONFIG_CPL_FILE", attribute=attribute) - if self._cime_driver == "nuopc" and not os.path.exists(drv_config_file): + if driver == "nuopc" and not os.path.exists(drv_config_file): drv_config_file = files.get_value("CONFIG_CPL_FILE", {"component": "cpl"}) expect( os.path.exists(drv_config_file), - "File {} not found, cime driver {}".format( - drv_config_file, self._cime_driver - ), + "File {} not found, cime driver {}".format(drv_config_file, driver), ) drv_comp = Component(drv_config_file, "CPL") @@ -910,7 +918,9 @@ def _xml_phase(self, test): elif opt.startswith("A"): # A option is for testing in ASYNC IO mode, only available with nuopc driver and pio2 envtest.set_test_parameter("PIO_ASYNC_INTERFACE", "TRUE") - envtest.set_test_parameter("CIME_DRIVER", "nuopc") + expect( + driver == "nuopc", "ASYNC IO mode only works with nuopc driver" + ) envtest.set_test_parameter("PIO_VERSION", "2") match = re.match("A([0-9]+)x?([0-9])*", opt) envtest.set_test_parameter("PIO_NUMTASKS_CPL", match.group(1)) @@ -996,10 +1006,14 @@ def _setup_phase(self, test): from_dir=test_dir, env=env, ) - expect( - cmdstat in [0, TESTS_FAILED_ERR_CODE], - "Fatal error in case.cmpgen_namelists: {}".format(output), - ) + try: + expect( + cmdstat in [0, TESTS_FAILED_ERR_CODE], + "Fatal error in case.cmpgen_namelists: {}".format(output), + ) + except Exception: + self._update_test_status_file(test, SETUP_PHASE, TEST_FAIL_STATUS) + raise if self._single_exe: with Case(self._get_test_dir(test), read_only=False) as case: diff --git a/CIME/test_status.py b/CIME/test_status.py index da818b2aca3..13e52497126 100644 --- a/CIME/test_status.py +++ b/CIME/test_status.py @@ -468,8 +468,8 @@ def _get_overall_status_based_on_phases( if rv in [NAMELIST_FAIL_STATUS, TEST_PASS_STATUS]: phase_responsible_for_status = phase # need to further inspect message to determine - # phase status - if "DIFF" in data[1]: + # phase status. BFAILs need to be a DIFF + if "DIFF" in data[1] or TEST_NO_BASELINES_COMMENT in data[1]: rv = TEST_DIFF_STATUS elif "ERROR" in data[1]: rv = TEST_FAIL_STATUS diff --git a/CIME/tests/test_sys_cime_case.py b/CIME/tests/test_sys_cime_case.py index 4b226ff3b46..d07456eb570 100644 --- a/CIME/tests/test_sys_cime_case.py +++ b/CIME/tests/test_sys_cime_case.py @@ -731,7 +731,8 @@ def test_self_build_cprnc(self): ) self.run_cmd_assert_result( - "./xmlchange CCSM_CPRNC=this_is_a_broken_cprnc", from_dir=casedir + "./xmlchange CCSM_CPRNC=this_is_a_broken_cprnc --file env_test.xml", + from_dir=casedir, ) self.run_cmd_assert_result("./case.build", from_dir=casedir) self.run_cmd_assert_result("./case.submit", from_dir=casedir) diff --git a/CIME/tests/test_sys_create_newcase.py b/CIME/tests/test_sys_create_newcase.py index b99ca4f10c4..1be636aff36 100644 --- a/CIME/tests/test_sys_create_newcase.py +++ b/CIME/tests/test_sys_create_newcase.py @@ -74,7 +74,7 @@ def test_a_createnewcase(self): # on systems (like github workflow) that do not have batch, set this for the next test if batch_system == "none": self.run_cmd_assert_result( - './xmlchange --subgroup case.run BATCH_COMMAND_FLAGS="-q \$JOB_QUEUE"', + r'./xmlchange --subgroup case.run BATCH_COMMAND_FLAGS="-q \$JOB_QUEUE"', from_dir=testdir, ) diff --git a/CIME/utils.py b/CIME/utils.py index f490a6f345f..2c04c6df769 100644 --- a/CIME/utils.py +++ b/CIME/utils.py @@ -820,12 +820,10 @@ def run_cmd( # or build a relative path and append `sys.path` to import # `standard_script_setup`. Providing `PYTHONPATH` fixes protential # broken paths in external python. - env.update( - { - "CIMEROOT": f"{get_cime_root()}", - "PYTHONPATH": f"{get_cime_root()}:{get_tools_path()}", - } - ) + env_pythonpath = os.environ.get("PYTHONPATH", "").split(":") + cime_pythonpath = [f"{get_cime_root()}", f"{get_tools_path()}"] + env_pythonpath + env["PYTHONPATH"] = ":".join(filter(None, cime_pythonpath)) + env["CIMEROOT"] = f"{get_cime_root()}" if timeout: with Timeout(timeout): @@ -2530,8 +2528,11 @@ def run_bld_cmd_ensure_logging(cmd, arg_logger, from_dir=None, timeout=None): expect(stat == 0, filter_unicode(errput)) -def get_batch_script_for_job(job): - return job if "st_archive" in job else "." + job +def get_batch_script_for_job(job, hidden=None): + # this if statement is for backward compatibility + if hidden is None: + hidden = job != "case.st_archive" + return "." + job if hidden else job def string_in_list(_string, _list):