Skip to content

Commit

Permalink
Merge branch 'master' into update-mvk-config
Browse files Browse the repository at this point in the history
  • Loading branch information
jasonb5 committed Sep 21, 2024
2 parents d1d02fc + 2776043 commit 0892b4a
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 49 deletions.
2 changes: 1 addition & 1 deletion CIME/SystemTests/pgn.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging

from collections import OrderedDict
from shutils import copytree
from shutil import copytree

import pandas as pd
import numpy as np
Expand Down
4 changes: 2 additions & 2 deletions CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
)
import CIME.build as build

import glob, gzip, time, traceback, os
import glob, gzip, time, traceback, os, math
from contextlib import ExitStack

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -174,7 +174,7 @@ def _set_restart_interval(self):
expect(False, f"stop_option {stop_option} not available for this test")

stop_n = int(stop_n * factor // coupling_secs)
rest_n = int((stop_n // 2 + 1) * coupling_secs / factor)
rest_n = math.ceil((stop_n // 2 + 1) * coupling_secs / factor)

expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))

Expand Down
1 change: 0 additions & 1 deletion CIME/Tools/xmlchange
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ from standard_script_setup import *
from CIME.utils import (
expect,
convert_to_type,
get_batch_script_for_job,
Timeout,
)
from CIME.status import append_case_status
Expand Down
82 changes: 64 additions & 18 deletions CIME/XML/env_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, case_root=None, infile="env_batch.xml", read_only=False):
case_root, infile, schema=schema, read_only=read_only
)
self._batchtype = self.get_batch_system_type()
self._env_workflow = None

# pylint: disable=arguments-differ
def set_value(self, item, value, subgroup=None, ignore_type=False):
Expand Down Expand Up @@ -204,14 +205,16 @@ def set_batch_system(self, batchobj, batch_system_type=None):
lock_file(os.path.basename(batchobj.filename), self._caseroot)

def get_job_overrides(self, job, case):
env_workflow = case.get_env("workflow")
if not self._env_workflow:
self._env_workflow = case.get_env("workflow")
(
total_tasks,
num_nodes,
tasks_per_node,
thread_count,
ngpus_per_node,
) = env_workflow.get_job_specs(case, job)
) = self._env_workflow.get_job_specs(case, job)

overrides = {}

if total_tasks:
Expand Down Expand Up @@ -257,7 +260,16 @@ def make_batch_script(self, input_template, job, case, outfile=None):
subgroup=job,
overrides=overrides,
)
output_name = get_batch_script_for_job(job) if outfile is None else outfile
if not self._env_workflow:
self._env_workflow = case.get_env("workflow")

output_name = (
get_batch_script_for_job(
job, hidden=self._env_workflow.hidden_job(case, job)
)
if outfile is None
else outfile
)
logger.info("Creating file {}".format(output_name))
with open(output_name, "w") as fd:
fd.write(output_text)
Expand All @@ -274,8 +286,10 @@ def set_job_defaults(self, batch_jobs, case):

if self._batchtype == "none":
return
env_workflow = case.get_env("workflow")
known_jobs = env_workflow.get_jobs()

if not self._env_workflow:
self._env_workflow = case.get_env("workflow")
known_jobs = self._env_workflow.get_jobs()

for job, jsect in batch_jobs:
if job not in known_jobs:
Expand Down Expand Up @@ -432,11 +446,13 @@ def set_job_defaults(self, batch_jobs, case):
seconds = convert_to_seconds(walltime)
full_bab_time = convert_to_babylonian_time(seconds)
walltime = format_time(walltime_format, "%H:%M:%S", full_bab_time)
if not self._env_workflow:
self._env_workflow = case.get_env("workflow")

env_workflow.set_value(
self._env_workflow.set_value(
"JOB_QUEUE", self.text(queue), subgroup=job, ignore_type=False
)
env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job)
self._env_workflow.set_value("JOB_WALLCLOCK_TIME", walltime, subgroup=job)
logger.debug(
"Job {} queue {} walltime {}".format(job, self.text(queue), walltime)
)
Expand Down Expand Up @@ -739,13 +755,22 @@ def submit_jobs(
waiting to resubmit at the end of the first sequence
workflow is a logical indicating whether only "job" is submitted or the workflow sequence starting with "job" is submitted
"""
env_workflow = case.get_env("workflow")

external_workflow = case.get_value("EXTERNAL_WORKFLOW")
alljobs = env_workflow.get_jobs()
if not self._env_workflow:
self._env_workflow = case.get_env("workflow")
alljobs = self._env_workflow.get_jobs()
alljobs = [
j
for j in alljobs
if os.path.isfile(os.path.join(self._caseroot, get_batch_script_for_job(j)))
if os.path.isfile(
os.path.join(
self._caseroot,
get_batch_script_for_job(
j, hidden=self._env_workflow.hidden_job(case, j)
),
)
)
]

startindex = 0
Expand All @@ -761,7 +786,9 @@ def submit_jobs(
if index < startindex:
continue
try:
prereq = env_workflow.get_value("prereq", subgroup=job, resolved=False)
prereq = self._env_workflow.get_value(
"prereq", subgroup=job, resolved=False
)
if (
external_workflow
or prereq is None
Expand All @@ -780,7 +807,9 @@ def submit_jobs(
),
)
if prereq:
jobs.append((job, env_workflow.get_value("dependency", subgroup=job)))
jobs.append(
(job, self._env_workflow.get_value("dependency", subgroup=job))
)

if self._batchtype == "cobalt":
break
Expand Down Expand Up @@ -1065,28 +1094,44 @@ def _submit_single_job(
set_continue_run=resubmit_immediate,
submit_resubmits=workflow and not resubmit_immediate,
)

if batch_system == "lsf" and not batch_env_flag:
sequence = (
run_args,
batchsubmit,
submitargs,
batchredirect,
get_batch_script_for_job(job),
get_batch_script_for_job(
job,
hidden=self._env_workflow.hidden_job(case, job),
),
)
elif batch_env_flag:
sequence = (
batchsubmit,
submitargs,
run_args,
batchredirect,
os.path.join(self._caseroot, get_batch_script_for_job(job)),
os.path.join(
self._caseroot,
get_batch_script_for_job(
job,
hidden=self._env_workflow.hidden_job(case, job),
),
),
)
else:
sequence = (
batchsubmit,
submitargs,
batchredirect,
os.path.join(self._caseroot, get_batch_script_for_job(job)),
os.path.join(
self._caseroot,
get_batch_script_for_job(
job,
hidden=self._env_workflow.hidden_job(case, job),
),
),
run_args,
)

Expand Down Expand Up @@ -1377,12 +1422,13 @@ def compare_xml(self, other):

def make_all_batch_files(self, case):
machdir = case.get_value("MACHDIR")
env_workflow = case.get_env("workflow")
logger.info("Creating batch scripts")
jobs = env_workflow.get_jobs()
if not self._env_workflow:
self._env_workflow = case.get_env("workflow")
jobs = self._env_workflow.get_jobs()
for job in jobs:
template = case.get_resolved_value(
env_workflow.get_value("template", subgroup=job)
self._env_workflow.get_value("template", subgroup=job)
)
if os.path.isabs(template):
input_batch_script = template
Expand Down
12 changes: 12 additions & 0 deletions CIME/XML/env_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from CIME.XML.standard_module_setup import *
from CIME.XML.env_base import EnvBase
from CIME.utils import get_cime_root

import re, math

logger = logging.getLogger(__name__)
Expand All @@ -21,6 +22,7 @@ def __init__(self, case_root=None, infile="env_workflow.xml", read_only=False):
# schema = os.path.join(get_cime_root(), "CIME", "config", "xml_schemas", "env_workflow.xsd")
# TODO: define schema for this file
schema = None
self._hidden = {}
super(EnvWorkflow, self).__init__(
case_root, infile, schema=schema, read_only=read_only
)
Expand Down Expand Up @@ -89,7 +91,17 @@ def get_type_info(self, vid):
)
return type_info

def hidden_job(self, case, job):
if job not in self._hidden:
self.get_job_specs(case, job)
return self._hidden[job]

def get_job_specs(self, case, job):
hidden = self.get_value("hidden", subgroup=job)
self._hidden[job] = (hidden is None and job != "case.st_archive") or (
hidden is not None and hidden.lower() == "true"
)

task_count = case.get_resolved_value(self.get_value("task_count", subgroup=job))
tasks_per_node = case.get_resolved_value(
self.get_value("tasks_per_node", subgroup=job)
Expand Down
2 changes: 1 addition & 1 deletion CIME/case/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ def _setup_mach_pes(self, pecount, multi_driver, ninst, machine_name, mpilib):
and value.endswith("'")
):
value = value[1:-1]
if append[key]:
if key in append and append[key]:
ovalue = self.get_value(key)

self.set_value(key, value + " " + ovalue)
Expand Down
2 changes: 2 additions & 0 deletions CIME/data/config/xml_schemas/config_workflow.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

<!-- simple elements -->
<xs:element name="template" type="xs:anyURI"/>
<xs:element name="hidden" type="xs:string"/>
<xs:element name="task_count" type="xs:string"/>
<xs:element name="tasks_per_node" type="xs:string"/>
<xs:element name="walltime" type="xs:string"/>
Expand Down Expand Up @@ -57,6 +58,7 @@
<xs:complexType>
<xs:sequence>
<xs:element ref="template"/>
<xs:element ref="hidden" minOccurs="0"/>
<xs:element minOccurs="0" ref="dependency"/>
<xs:element ref="prereq"/>
<xs:element ref="runtime_parameters" minOccurs="0" maxOccurs="unbounded"/>
Expand Down
10 changes: 10 additions & 0 deletions CIME/scripts/create_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,11 @@ def parse_command_line(args, description):
"\nNOTE: this can also be done after the fact with bless_test_results",
)

parser.add_argument(
"--driver",
help="Override driver specified in tests and use this one.",
)

default = get_default_setting(config, "COMPILER", None, check_main=True)

parser.add_argument(
Expand Down Expand Up @@ -775,6 +780,7 @@ def parse_command_line(args, description):
args.workflow,
args.chksum,
args.force_rebuild,
args.driver,
)


Expand Down Expand Up @@ -936,6 +942,7 @@ def create_test(
workflow,
chksum,
force_rebuild,
driver,
):
###############################################################################
impl = TestScheduler(
Expand Down Expand Up @@ -977,6 +984,7 @@ def create_test(
workflow=workflow,
chksum=chksum,
force_rebuild=force_rebuild,
driver=driver,
)

success = impl.run_tests(
Expand Down Expand Up @@ -1081,6 +1089,7 @@ def _main_func(description=None):
workflow,
chksum,
force_rebuild,
driver,
) = parse_command_line(sys.argv, description)

success = False
Expand Down Expand Up @@ -1134,6 +1143,7 @@ def _main_func(description=None):
workflow,
chksum,
force_rebuild,
driver,
)
run_count += 1

Expand Down
Loading

0 comments on commit 0892b4a

Please sign in to comment.