Skip to content

Commit

Permalink
Merge pull request #4690 from sjsprecious/use_xml_for_gpu_config
Browse files Browse the repository at this point in the history
Remove GPU options from the Python workflow and move them to XML files for CESM
  • Loading branch information
jedwards4b authored Oct 7, 2024
2 parents a7bd4df + 246eea7 commit 62a9b17
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 154 deletions.
46 changes: 29 additions & 17 deletions CIME/XML/env_mach_pes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ def get_value(
attribute=None,
resolved=True,
subgroup=None,
max_mpitasks_per_node=None,
max_cputasks_per_gpu_node=None,
ngpus_per_node=None,
): # pylint: disable=arguments-differ
# Special variable NINST_MAX is used to determine the number of
# drivers in multi-driver mode.
Expand All @@ -58,12 +55,9 @@ def get_value(
value = EnvBase.get_value(self, vid, attribute, resolved, subgroup)

if "NTASKS" in vid or "ROOTPE" in vid:
if max_mpitasks_per_node is None:
max_mpitasks_per_node = self.get_value("MAX_MPITASKS_PER_NODE")
if max_cputasks_per_gpu_node is None:
max_cputasks_per_gpu_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE")
if ngpus_per_node is None:
ngpus_per_node = self.get_value("NGPUS_PER_NODE")
max_mpitasks_per_node = self.get_value("MAX_MPITASKS_PER_NODE")
max_cputasks_per_gpu_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE")
ngpus_per_node = self.get_value("NGPUS_PER_NODE")
if (ngpus_per_node and value) and value < 0:
value = -1 * value * max_cputasks_per_gpu_node
elif value and value < 0:
Expand Down Expand Up @@ -176,18 +170,29 @@ def get_tasks_per_node(self, total_tasks, max_thread_count):
"totaltasks > 0 expected, totaltasks = {}".format(total_tasks),
)
if self._comp_interface == "nuopc" and self.get_value("ESMF_AWARE_THREADING"):
if self.get_value("NGPUS_PER_NODE") > 0:
tasks_per_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE")
ngpus_per_node = self.get_value("NGPUS_PER_NODE")
if ngpus_per_node and ngpus_per_node > 0:
if self.get_value("OVERSUBSCRIBE_GPU"):
tasks_per_node = self.get_value("MAX_CPUTASKS_PER_GPU_NODE")
else:
tasks_per_node = self.get_value("NGPUS_PER_NODE")
else:
tasks_per_node = self.get_value("MAX_MPITASKS_PER_NODE")
else:
ngpus_per_node = self.get_value("NGPUS_PER_NODE")
if ngpus_per_node and ngpus_per_node > 0:
tasks_per_node = min(
self.get_value("MAX_TASKS_PER_NODE") // max_thread_count,
self.get_value("MAX_CPUTASKS_PER_GPU_NODE"),
total_tasks,
)
if self.get_value("OVERSUBSCRIBE_GPU"):
tasks_per_node = min(
self.get_value("MAX_TASKS_PER_NODE") // max_thread_count,
self.get_value("MAX_CPUTASKS_PER_GPU_NODE"),
total_tasks,
)
else:
tasks_per_node = min(
self.get_value("MAX_TASKS_PER_NODE") // max_thread_count,
self.get_value("NGPUS_PER_NODE"),
total_tasks,
)
else:
tasks_per_node = min(
self.get_value("MAX_TASKS_PER_NODE") // max_thread_count,
Expand All @@ -204,7 +209,14 @@ def get_total_nodes(self, total_tasks, max_thread_count):
if self._comp_interface == "nuopc" and self.get_value("ESMF_AWARE_THREADING"):
max_thread_count = 1
tasks_per_node = self.get_tasks_per_node(total_tasks, max_thread_count)
num_nodes = int(math.ceil(float(total_tasks) / tasks_per_node))
if self.get_value("OVERSUBSCRIBE_GPU"):
num_nodes = int(math.ceil(float(total_tasks) / tasks_per_node))
else:
ngpus_per_node = self.get_value("NGPUS_PER_NODE")
if ngpus_per_node and ngpus_per_node > 0:
num_nodes = int(math.ceil(float(total_tasks) / ngpus_per_node))
else:
num_nodes = int(math.ceil(float(total_tasks) / tasks_per_node))
return num_nodes, self.get_spare_nodes(num_nodes)

def get_spare_nodes(self, num_nodes):
Expand Down
16 changes: 4 additions & 12 deletions CIME/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,18 +247,10 @@ def get_standard_cmake_args(case, sharedpath):
)
# check settings for GPU
gpu_type = case.get_value("GPU_TYPE")
gpu_offload = case.get_value("GPU_OFFLOAD")
if gpu_type != "none":
expect(
gpu_offload != "none",
"Both GPU_TYPE and GPU_OFFLOAD must be defined if either is",
)
cmake_args += f" -DGPU_TYPE={gpu_type} -DGPU_OFFLOAD={gpu_offload}"
else:
expect(
gpu_offload == "none",
"Both GPU_TYPE and GPU_OFFLOAD must be defined if either is",
)
openacc_gpu_offload = case.get_value("OPENACC_GPU_OFFLOAD")
openmp_gpu_offload = case.get_value("OPENMP_GPU_OFFLOAD")
kokkos_gpu_offload = case.get_value("KOKKOS_GPU_OFFLOAD")
cmake_args += f" -DGPU_TYPE={gpu_type} -DOPENACC_GPU_OFFLOAD={openacc_gpu_offload} -DOPENMP_GPU_OFFLOAD={openmp_gpu_offload} -DKOKKOS_GPU_OFFLOAD={kokkos_gpu_offload} "

ocn_model = case.get_value("COMP_OCN")
atm_dycore = case.get_value("CAM_DYCORE")
Expand Down
67 changes: 0 additions & 67 deletions CIME/case/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,9 +1301,6 @@ def configure(
non_local=False,
extra_machines_dir=None,
case_group=None,
ngpus_per_node=0,
gpu_type=None,
gpu_offload=None,
):
expect(
check_name(compset_name, additional_chars="."),
Expand Down Expand Up @@ -1561,64 +1558,6 @@ def configure(
if test:
self.set_value("TEST", True)

# ----------------------------------------------------------------------------------------------------------
# Sanity check for a GPU run:
# 1. GPU_TYPE and GPU_OFFLOAD must both be defined to use GPUS
# 2. if ngpus_per_node argument is larger than the value of MAX_GPUS_PER_NODE, the NGPUS_PER_NODE
# XML variable in the env_mach_pes.xml file would be set to MAX_GPUS_PER_NODE automatically.
# 3. if ngpus-per-node argument is equal to 0, it will be updated to 1 automatically.
# ----------------------------------------------------------------------------------------------------------
max_gpus_per_node = self.get_value("MAX_GPUS_PER_NODE")
if gpu_type and str(gpu_type).lower() != "none":
expect(
max_gpus_per_node,
f"GPUS are not defined for machine={machine_name} and compiler={compiler}",
)
expect(
gpu_offload,
"Both gpu-type and gpu-offload must be defined if either is defined",
)
expect(
compiler in ["nvhpc", "cray"],
f"Only nvhpc and cray compilers are expected for a GPU run; the user given compiler is {compiler}, ",
)
valid_gpu_type = self.get_value("GPU_TYPE").split(",")
valid_gpu_type.remove("none")
expect(
gpu_type in valid_gpu_type,
f"Unsupported GPU type is given: {gpu_type} ; valid values are {valid_gpu_type}",
)
valid_gpu_offload = self.get_value("GPU_OFFLOAD").split(",")
valid_gpu_offload.remove("none")
expect(
gpu_offload in valid_gpu_offload,
f"Unsupported GPU programming model is given: {gpu_offload} ; valid values are {valid_gpu_offload}",
)
self.gpu_enabled = True
if ngpus_per_node >= 0:
self.set_value(
"NGPUS_PER_NODE",
max(1, ngpus_per_node)
if ngpus_per_node <= max_gpus_per_node
else max_gpus_per_node,
)
elif gpu_offload and str(gpu_offload).lower() != "none":
expect(
False,
"Both gpu-type and gpu-offload must be defined if either is defined",
)
elif ngpus_per_node != 0:
expect(
False,
f"ngpus_per_node is expected to be 0 for a pure CPU run ; {ngpus_per_node} is provided instead ;",
)

# Set these two GPU XML variables here to overwrite the default values
# Only set them for "cesm" model
if self._cime_model == "cesm":
self.set_value("GPU_TYPE", str(gpu_type).lower())
self.set_value("GPU_OFFLOAD", str(gpu_offload).lower())

self.initialize_derived_attributes()

# --------------------------------------------
Expand Down Expand Up @@ -2440,9 +2379,6 @@ def create(
non_local=False,
extra_machines_dir=None,
case_group=None,
ngpus_per_node=0,
gpu_type=None,
gpu_offload=None,
):
try:
# Set values for env_case.xml
Expand Down Expand Up @@ -2515,9 +2451,6 @@ def create(
non_local=non_local,
extra_machines_dir=extra_machines_dir,
case_group=case_group,
ngpus_per_node=ngpus_per_node,
gpu_type=gpu_type,
gpu_offload=gpu_offload,
)

self.create_caseroot()
Expand Down
43 changes: 42 additions & 1 deletion CIME/case/case_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,48 @@ def _case_setup_impl(
+ case.iotasks,
)

# ----------------------------------------------------------------------------------------------------------
# Sanity check for a GPU run:
# 1. GPU_TYPE and GPU_OFFLOAD must both be defined to use GPUs
# 2. If the NGPUS_PER_NODE XML variable in the env_mach_pes.xml file is larger than
# the value of MAX_GPUS_PER_NODE, set it to MAX_GPUS_PER_NODE automatically.
# 3. If the NGPUS_PER_NODE XML variable is equal to 0, it will be updated to 1 automatically.
# ----------------------------------------------------------------------------------------------------------
max_gpus_per_node = case.get_value("MAX_GPUS_PER_NODE")
gpu_type = case.get_value("GPU_TYPE")
openacc_gpu_offload = case.get_value("OPENACC_GPU_OFFLOAD")
openmp_gpu_offload = case.get_value("OPENMP_GPU_OFFLOAD")
kokkos_gpu_offload = case.get_value("KOKKOS_GPU_OFFLOAD")
gpu_offload = (
openacc_gpu_offload or openmp_gpu_offload or kokkos_gpu_offload
)
ngpus_per_node = case.get_value("NGPUS_PER_NODE")
if gpu_type and str(gpu_type).lower() != "none":
if max_gpus_per_node <= 0:
raise RuntimeError(
f"MAX_GPUS_PER_NODE must be larger than 0 for machine={mach} and compiler={compiler} in order to configure a GPU run"
)
if not gpu_offload:
raise RuntimeError(
"GPU_TYPE is defined but none of the GPU OFFLOAD options are enabled"
)
case.gpu_enabled = True
if ngpus_per_node >= 0:
case.set_value(
"NGPUS_PER_NODE",
max(1, ngpus_per_node)
if ngpus_per_node <= max_gpus_per_node
else max_gpus_per_node,
)
elif gpu_offload:
raise RuntimeError(
"GPU_TYPE is not defined but at least one GPU OFFLOAD option is enabled"
)
elif ngpus_per_node and ngpus_per_node != 0:
raise RuntimeError(
f"ngpus_per_node is expected to be 0 for a pure CPU run ; {ngpus_per_node} is provided instead ;"
)

# May need to select new batch settings if pelayout changed (e.g. problem is now too big for prev-selected queue)
env_batch = case.get_env("batch")
env_batch.set_job_defaults([(case.get_primary_job(), {})], case)
Expand Down Expand Up @@ -527,7 +569,6 @@ def case_setup(self, clean=False, test_mode=False, reset=False, keep=None):


def _create_case_repo(self, caseroot):

self._gitinterface = GitInterface(caseroot, logger, branch=self.get_value("CASE"))
if self._gitinterface and not os.path.exists(os.path.join(caseroot, ".gitignore")):
safe_copy(
Expand Down
8 changes: 0 additions & 8 deletions CIME/data/config/xml_schemas/config_machines.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
<xs:attribute name="mpilib" type="xs:string"/>
<xs:attribute name="comp_interface" type="xs:string"/>
<xs:attribute name="gpu_type" type="xs:string"/>
<xs:attribute name="gpu_offload" type="xs:string"/>
<xs:attribute name="queue" type="xs:string"/>
<xs:attribute name="DEBUG" type="upperBoolean"/>
<xs:attribute name="PIO_VERSION" type="xs:integer"/>
Expand Down Expand Up @@ -59,8 +58,6 @@
<xs:element name="MAX_GPUS_PER_NODE" type="AttrElement"/>
<xs:element name="MAX_MPITASKS_PER_NODE" type="AttrElement"/>
<xs:element name="MAX_CPUTASKS_PER_GPU_NODE" type="AttrElement"/>
<xs:element name="GPU_TYPE" type="AttrElement"/>
<xs:element name="GPU_OFFLOAD" type="AttrElement"/>
<xs:element name="MPI_GPU_WRAPPER_SCRIPT" type="AttrElement"/>
<xs:element name="COSTPES_PER_NODE" type="xs:integer"/>
<xs:element name="PROJECT_REQUIRED" type="xs:NCName"/>
Expand Down Expand Up @@ -175,10 +172,6 @@
<!-- MAX_CPUTASKS_PER_GPU_NODE: number of physical PES per GPU node on
this machine, in practice the MPI tasks per node will not exceed this value -->
<xs:element ref="MAX_CPUTASKS_PER_GPU_NODE" minOccurs="0" maxOccurs="unbounded"/>
<!-- GPU_TYPE: the type of GPU hardware available on this machine -->
<xs:element ref="GPU_TYPE" minOccurs="0" maxOccurs="unbounded"/>
<!-- GPU_OFFLOAD: the GPU programming model used for GPU porting -->
<xs:element ref="GPU_OFFLOAD" minOccurs="0" maxOccurs="unbounded"/>
<!-- MPI_GPU_WRAPPER_SCRIPT: a wrapper script that will be attached to the MPI run
command and map different MPI ranks to different GPUs within the same node -->
<xs:element ref="MPI_GPU_WRAPPER_SCRIPT" minOccurs="0" maxOccurs="1"/>
Expand Down Expand Up @@ -265,7 +258,6 @@
<xs:attribute ref="PIO_VERSION"/>
<xs:attribute ref="mpilib"/>
<xs:attribute ref="comp_interface"/>
<xs:attribute ref="gpu_offload"/>
<xs:attribute ref="gpu_type"/>
</xs:complexType>
</xs:element>
Expand Down
11 changes: 10 additions & 1 deletion CIME/data/config/xml_schemas/entry_id_base.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
<!-- simple elements -->
<xs:element name="help" type="xs:string"/>
<xs:element name="default_value" type="xs:string"/>
<xs:element name="valid_values" type="xs:string"/>
<xs:element name="category" type="xs:string"/>
<xs:element name="header" type="xs:string"/>

Expand All @@ -28,6 +27,16 @@
</xs:complexType>
</xs:element>

<xs:element name="valid_values">
<xs:complexType>
<xs:simpleContent>
<xs:extension base="xs:string">
<xs:anyAttribute processContents="lax"/>
</xs:extension>
</xs:simpleContent>
</xs:complexType>
</xs:element>

<xs:element name="desc">
<xs:complexType mixed="true">
<xs:attribute ref="compset"/>
Expand Down
2 changes: 0 additions & 2 deletions CIME/data/config/xml_schemas/env_mach_specific.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
<xs:attribute name="mpilib" type="xs:string"/>
<xs:attribute name="comp_interface" type="xs:string"/>
<xs:attribute name="gpu_type" type="xs:string"/>
<xs:attribute name="gpu_offload" type="xs:string"/>
<xs:attribute name="BUILD_THREADED" type="xs:string"/>
<xs:attribute name="value" type="xs:string"/>
<xs:attribute name="unit_testing" type="xs:boolean"/>
Expand Down Expand Up @@ -105,7 +104,6 @@
<xs:attribute ref="mpilib"/>
<xs:attribute ref="comp_interface"/>
<xs:attribute ref="gpu_type"/>
<xs:attribute ref="gpu_offload"/>
</xs:complexType>
</xs:element>

Expand Down
28 changes: 0 additions & 28 deletions CIME/scripts/create_newcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,25 +264,6 @@ def parse_command_line(args, cimeroot, description):

parser.add_argument("--case-group", help="Add this case to a case group")

parser.add_argument(
"--ngpus-per-node",
default=0,
type=int,
help="Specify number of GPUs used for simulation. ",
)

parser.add_argument(
"--gpu-type",
default=None,
help="Specify type of GPU hardware - currently supported are v100, a100, mi250",
)

parser.add_argument(
"--gpu-offload",
default=None,
help="Specify gpu offload method - currently supported are openacc, openmp, combined",
)

args = CIME.utils.parse_args_and_handle_standard_logging_options(args, parser)

if args.srcroot is not None:
Expand Down Expand Up @@ -358,9 +339,6 @@ def parse_command_line(args, cimeroot, description):
args.non_local,
args.extra_machines_dir,
args.case_group,
args.ngpus_per_node,
args.gpu_type,
args.gpu_offload,
)


Expand Down Expand Up @@ -397,9 +375,6 @@ def _main_func(description=None):
non_local,
extra_machines_dir,
case_group,
ngpus_per_node,
gpu_type,
gpu_offload,
) = parse_command_line(sys.argv, cimeroot, description)

if script_root is None:
Expand Down Expand Up @@ -464,9 +439,6 @@ def _main_func(description=None):
non_local=non_local,
extra_machines_dir=extra_machines_dir,
case_group=case_group,
ngpus_per_node=ngpus_per_node,
gpu_type=gpu_type,
gpu_offload=gpu_offload,
)

# Called after create since casedir does not exist yet
Expand Down
Loading

0 comments on commit 62a9b17

Please sign in to comment.