Skip to content

Commit

Permalink
undo merge and rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
jedwards4b committed Oct 22, 2024
1 parent ffe42e2 commit a320d61
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 138 deletions.
13 changes: 6 additions & 7 deletions CIME/SystemTests/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import glob, os
from CIME.XML.standard_module_setup import *
from CIME.SystemTests.restart_tests import RestartTest
from CIME.utils import ls_sorted_by_mtime, safe_copy
from CIME.utils import safe_copy

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -35,12 +35,11 @@ def _case_two_setup(self):

def _case_two_custom_prerun_action(self):
dout_s_root = self._case1.get_value("DOUT_S_ROOT")
rest_root = os.path.abspath(os.path.join(dout_s_root, "rest"))
restart_list = ls_sorted_by_mtime(rest_root)
expect(len(restart_list) >= 1, "No restart files found in {}".format(rest_root))
self._case.restore_from_archive(
rest_dir=os.path.join(rest_root, restart_list[0])
)
self._drv_restart_pointer = self._case2.get_value("DRV_RESTART_POINTER")
resttime = self._drv_restart_pointer[-16:]
rest_root = os.path.abspath(os.path.join(dout_s_root, "rest", resttime))
expect(os.path.isdir(rest_root), "None such directory {}".format(rest_root))
self._case.restore_from_archive(rest_dir=rest_root)

def _case_two_custom_postrun_action(self):
# Link back to original case1 name
Expand Down
96 changes: 16 additions & 80 deletions CIME/case/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,9 +1301,6 @@ def configure(
non_local=False,
extra_machines_dir=None,
case_group=None,
ngpus_per_node=0,
gpu_type=None,
gpu_offload=None,
):
expect(
check_name(compset_name, additional_chars="."),
Expand Down Expand Up @@ -1561,64 +1558,6 @@ def configure(
if test:
self.set_value("TEST", True)

# ----------------------------------------------------------------------------------------------------------
# Sanity check for a GPU run:
# 1. GPU_TYPE and GPU_OFFLOAD must both be defined to use GPUS
# 2. if ngpus_per_node argument is larger than the value of MAX_GPUS_PER_NODE, the NGPUS_PER_NODE
# XML variable in the env_mach_pes.xml file would be set to MAX_GPUS_PER_NODE automatically.
# 3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically.
# ----------------------------------------------------------------------------------------------------------
max_gpus_per_node = self.get_value("MAX_GPUS_PER_NODE")
if gpu_type and str(gpu_type).lower() != "none":
expect(
max_gpus_per_node,
f"GPUS are not defined for machine={machine_name} and compiler={compiler}",
)
expect(
gpu_offload,
"Both gpu-type and gpu-offload must be defined if either is defined",
)
expect(
compiler in ["nvhpc", "cray"],
f"Only nvhpc and cray compilers are expected for a GPU run; the user given compiler is {compiler}, ",
)
valid_gpu_type = self.get_value("GPU_TYPE").split(",")
valid_gpu_type.remove("none")
expect(
gpu_type in valid_gpu_type,
f"Unsupported GPU type is given: {gpu_type} ; valid values are {valid_gpu_type}",
)
valid_gpu_offload = self.get_value("GPU_OFFLOAD").split(",")
valid_gpu_offload.remove("none")
expect(
gpu_offload in valid_gpu_offload,
f"Unsupported GPU programming model is given: {gpu_offload} ; valid values are {valid_gpu_offload}",
)
self.gpu_enabled = True
if ngpus_per_node >= 0:
self.set_value(
"NGPUS_PER_NODE",
max(1, ngpus_per_node)
if ngpus_per_node <= max_gpus_per_node
else max_gpus_per_node,
)
elif gpu_offload and str(gpu_offload).lower() != "none":
expect(
False,
"Both gpu-type and gpu-offload must be defined if either is defined",
)
elif ngpus_per_node != 0:
expect(
False,
f"ngpus_per_node is expected to be 0 for a pure CPU run ; {ngpus_per_node} is provided instead ;",
)

# Set these two GPU XML variables here to overwrite the default values
# Only set them for "cesm" model
if self._cime_model == "cesm":
self.set_value("GPU_TYPE", str(gpu_type).lower())
self.set_value("GPU_OFFLOAD", str(gpu_offload).lower())

self.initialize_derived_attributes()

# --------------------------------------------
Expand Down Expand Up @@ -1907,13 +1846,15 @@ def create_caseroot(self, clone=False):
component_class in self._component_description
and len(self._component_description[component_class]) > 0
):
append_status(
"Component {} is {}".format(
component_class, self._component_description[component_class]
),
"README.case",
caseroot=self._caseroot,
)
if "Stub" not in self._component_description[component_class]:
append_status(
"Component {} is {}".format(
component_class,
self._component_description[component_class],
),
"README.case",
caseroot=self._caseroot,
)
if component_class == "CPL":
append_status(
"Using %s coupler instances" % (self.get_value("NINST_CPL")),
Expand All @@ -1922,12 +1863,13 @@ def create_caseroot(self, clone=False):
)
continue
comp_grid = "{}_GRID".format(component_class)

append_status(
"{} is {}".format(comp_grid, self.get_value(comp_grid)),
"README.case",
caseroot=self._caseroot,
)
grid_val = self.get_value(comp_grid)
if grid_val is not "null":
append_status(
"{} is {}".format(comp_grid, self.get_value(comp_grid)),
"README.case",
caseroot=self._caseroot,
)
comp = str(self.get_value("COMP_{}".format(component_class)))
user_mods = self._get_comp_user_mods(comp)
if user_mods is not None:
Expand Down Expand Up @@ -2440,9 +2382,6 @@ def create(
non_local=False,
extra_machines_dir=None,
case_group=None,
ngpus_per_node=0,
gpu_type=None,
gpu_offload=None,
):
try:
# Set values for env_case.xml
Expand Down Expand Up @@ -2515,9 +2454,6 @@ def create(
non_local=non_local,
extra_machines_dir=extra_machines_dir,
case_group=case_group,
ngpus_per_node=ngpus_per_node,
gpu_type=gpu_type,
gpu_offload=gpu_offload,
)

self.create_caseroot()
Expand Down
127 changes: 76 additions & 51 deletions CIME/case/case_st_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,60 +168,84 @@ def _archive_rpointer_files(
datename_is_last,
):
###############################################################################

if datename_is_last:
# Copy of all rpointer files for latest restart date
rpointers = glob.glob(os.path.join(rundir, "rpointer.*"))
for rpointer in rpointers:
safe_copy(
rpointer, os.path.join(archive_restdir, os.path.basename(rpointer))
)
rpointers = glob.glob(
os.path.join(rundir, "rpointer.*" + _datetime_str(datename))
)
# If timestamped rpointers exist use them
if rpointers:
for rpointer in rpointers:
safe_copy(
rpointer, os.path.join(archive_restdir, os.path.basename(rpointer))
)
else:
# Copy of all rpointer files for latest restart date
rpointers = glob.glob(os.path.join(rundir, "rpointer.*"))
for rpointer in rpointers:
safe_copy(
rpointer, os.path.join(archive_restdir, os.path.basename(rpointer))
)
else:
# Generate rpointer file(s) for interim restarts for the one datename and each
# possible value of ninst_strings
if save_interim_restart_files:
# parse env_archive.xml to determine the rpointer files
# and contents for the given archive_entry tag
rpointer_items = archive.get_rpointer_contents(archive_entry)

# loop through the possible rpointer files and contents
for rpointer_file, rpointer_content in rpointer_items:
temp_rpointer_file = rpointer_file
temp_rpointer_content = rpointer_content

# put in a temporary setting for ninst_strings if they are empty
# in order to have just one loop over ninst_strings below
if rpointer_content != "unset":
if not ninst_strings:
ninst_strings = ["empty"]

for ninst_string in ninst_strings:
rpointer_file = temp_rpointer_file
rpointer_content = temp_rpointer_content
if ninst_string == "empty":
ninst_string = ""
for key, value in [
("$CASE", casename),
("$DATENAME", _datetime_str(datename)),
("$MPAS_DATENAME", _datetime_str_mpas(datename)),
("$NINST_STRING", ninst_string),
]:
rpointer_file = rpointer_file.replace(key, value)
rpointer_content = rpointer_content.replace(key, value)

# write out the respective files with the correct contents
rpointer_file = os.path.join(archive_restdir, rpointer_file)
logger.info("writing rpointer_file {}".format(rpointer_file))
f = open(rpointer_file, "w")
for output in rpointer_content.split(","):
f.write("{} \n".format(output))
f.close()
else:
logger.info(
"rpointer_content unset, not creating rpointer file {}".format(
rpointer_file
)
rpointers = glob.glob(
os.path.join(rundir, "rpointer.*" + _datetime_str(datename))
)
# If timestamped rpointers exist use them
if rpointers:
for rpointer in rpointers:
safe_copy(
rpointer,
os.path.join(archive_restdir, os.path.basename(rpointer)),
)
else:
# parse env_archive.xml to determine the rpointer files
# and contents for the given archive_entry tag
rpointer_items = archive.get_rpointer_contents(archive_entry)

# loop through the possible rpointer files and contents
for rpointer_file, rpointer_content in rpointer_items:
temp_rpointer_file = rpointer_file
temp_rpointer_content = rpointer_content

# put in a temporary setting for ninst_strings if they are empty
# in order to have just one loop over ninst_strings below
if rpointer_content != "unset":
if not ninst_strings:
ninst_strings = ["empty"]

for ninst_string in ninst_strings:
rpointer_file = temp_rpointer_file
rpointer_content = temp_rpointer_content
if ninst_string == "empty":
ninst_string = ""
for key, value in [
("$CASE", casename),
("$DATENAME", _datetime_str(datename)),
("$MPAS_DATENAME", _datetime_str_mpas(datename)),
("$NINST_STRING", ninst_string),
]:
rpointer_file = rpointer_file.replace(key, value)
rpointer_content = rpointer_content.replace(key, value)

# write out the respective files with the correct contents
rpointer_file = os.path.join(
archive_restdir, rpointer_file
)
logger.info(
"writing rpointer_file {}".format(rpointer_file)
)
f = open(rpointer_file, "w")
for output in rpointer_content.split(","):
f.write("{} \n".format(output))
f.close()
else:
logger.info(
"rpointer_content unset, not creating rpointer file {}".format(
rpointer_file
)
)


###############################################################################
Expand Down Expand Up @@ -501,9 +525,10 @@ def _archive_restarts_date_comp(
"""
datename_str = _datetime_str(datename)

if datename_is_last or case.get_value("DOUT_S_SAVE_INTERIM_RESTART_FILES"):
if not os.path.exists(archive_restdir):
os.makedirs(archive_restdir)
if (
datename_is_last or case.get_value("DOUT_S_SAVE_INTERIM_RESTART_FILES")
) and not os.path.isdir(archive_restdir):
os.makedirs(archive_restdir)

# archive the rpointer file(s) for this datename and all possible ninst_strings
_archive_rpointer_files(
Expand Down

0 comments on commit a320d61

Please sign in to comment.