Skip to content

Commit

Permalink
Merge remote-tracking branch 'C5/gaeac6' into feature/datms2sw
Browse files Browse the repository at this point in the history
  • Loading branch information
DeniseWorthen committed Oct 18, 2024
2 parents 58f69c6 + 402c05b commit 46edda9
Show file tree
Hide file tree
Showing 26 changed files with 339 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ help([[
on the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea ]===])
whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C5]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0"
stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.25"
stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.28"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
Expand All @@ -30,4 +30,4 @@ unload("cray-libsci")
setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaea.intel")
setenv("CMAKE_Platform","gaeac5.intel")
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
help([[
This module loads libraries required for building and running UFS Weather Model
on the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0.
on the NOAA RDHPC machine Gaea C5 using Intel-2023.2.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea ]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/upp-addon-env/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0"
stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.25"
stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.28"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
Expand All @@ -27,8 +27,8 @@ load(pathJoin("nccmp", nccmp_ver))
unload("darshan-runtime")
unload("cray-libsci")

unload("intel-classic/2023.1.0")
load("intel-oneapi/2023.1.0")
unload("intel-classic/2023.2.0")
load("intel-oneapi/2023.2.0")

setenv("I_MPI_CC", "icx")
setenv("I_MPI_CXX", "icpx")
Expand All @@ -37,4 +37,4 @@ setenv("I_MPI_F90", "ifx")
setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaea.intelllvm")
setenv("CMAKE_Platform","gaeac5.intelllvm")
33 changes: 33 additions & 0 deletions modulefiles/ufs_gaeac6.intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
help([[
This module loads libraries required for building and running UFS Weather Model
on the NOAA RDHPC machine Gaea C6 using Intel-2023.2.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C6]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.29"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
load(pathJoin("stack-python", stack_python_ver))

cmake_ver=os.getenv("cmake_ver") or "3.23.1"
load(pathJoin("cmake", cmake_ver))

load("ufs_common")

nccmp_ver=os.getenv("nccmp_ver") or "1.9.0.1"
load(pathJoin("nccmp", nccmp_ver))

unload("darshan-runtime")
unload("cray-libsci")

setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaeac6.intel")
40 changes: 40 additions & 0 deletions modulefiles/ufs_gaeac6.intelllvm.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
help([[
This module loads libraries required for building and running UFS Weather Model
on the NOAA RDHPC machine Gaea C6 using Intel-2023.2.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea ]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.29"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
load(pathJoin("stack-python", stack_python_ver))

cmake_ver=os.getenv("cmake_ver") or "3.23.1"
load(pathJoin("cmake", cmake_ver))

load("ufs_common")

nccmp_ver=os.getenv("nccmp_ver") or "1.9.0.1"
load(pathJoin("nccmp", nccmp_ver))

unload("darshan-runtime")
unload("cray-libsci")

unload("intel-classic/2023.2.0")
load("intel-oneapi/2023.2.0")

setenv("I_MPI_CC", "icx")
setenv("I_MPI_CXX", "icpx")
setenv("I_MPI_F90", "ifx")

setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaeac6.intelllvm")
16 changes: 4 additions & 12 deletions tests/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ case ${MACHINE_ID} in
;;
*)
# Activate lua environment for gaea c5
if [[ ${MACHINE_ID} == gaea ]]; then
if [[ ${MACHINE_ID} == gaeac5 ]]; then
module reset
fi
if [[ ${MACHINE_ID} == gaeac6 ]]; then
module reset
fi
# Load fv3 module
Expand Down Expand Up @@ -94,17 +97,6 @@ SUITES=$(grep -Po "\-DCCPP_SUITES=\K[^ ]*" <<< "${MAKE_OPT}")
export SUITES
set -ex

# Valid applications
if [[ ${MACHINE_ID} != gaea ]] || [[ ${RT_COMPILER} != intelllvm ]]; then # skip MOM6SOLO on gaea with intelllvm
if [[ "${MAKE_OPT}" == *"-DAPP=S2S"* ]]; then
CMAKE_FLAGS+=" -DMOM6SOLO=ON"
fi

if [[ "${MAKE_OPT}" == *"-DAPP=NG-GODAS"* ]]; then
CMAKE_FLAGS+=" -DMOM6SOLO=ON"
fi
fi

CMAKE_FLAGS=$(set -e; trim "${CMAKE_FLAGS}")
echo "CMAKE_FLAGS = ${CMAKE_FLAGS}"

Expand Down
22 changes: 21 additions & 1 deletion tests/default_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ elif [[ ${MACHINE_ID} = s4 ]]; then
export ICE_tasks_cpl_bmrk=48
export WAV_tasks_cpl_bmrk=80

elif [[ ${MACHINE_ID} = gaea ]]; then
elif [[ ${MACHINE_ID} = gaeac5 ]]; then

export TPN=128

Expand All @@ -323,6 +323,26 @@ elif [[ ${MACHINE_ID} = gaea ]]; then
export WPG_cpl_atmw_gdas=24
export WAV_tasks_atmw_gdas=264

elif [[ ${MACHINE_ID} = gaeac6 ]]; then

export TPN=192

export INPES_dflt=3
export JNPES_dflt=8
export INPES_thrd=3
export JNPES_thrd=4
export INPES_c384=6
export JNPES_c384=8
export THRD_c384=1
export INPES_c768=8
export JNPES_c768=16
export THRD_c768=2

export THRD_cpl_atmw_gdas=3
export INPES_cpl_atmw_gdas=6
export JNPES_cpl_atmw_gdas=8
export WPG_cpl_atmw_gdas=24
export WAV_tasks_atmw_gdas=264
elif [[ ${MACHINE_ID} = derecho ]]; then

export TPN=128
Expand Down
15 changes: 10 additions & 5 deletions tests/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ case $(hostname -f) in
dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9
dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10

gaea5[1-8]) MACHINE_ID=gaea ;; ### gaea51-58
gaea5[1-8].ncrc.gov) MACHINE_ID=gaea ;; ### gaea51-58
gaea5[1-8]) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea5[1-8].ncrc.gov) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea6[1-8]) MACHINE_ID=gaeac6 ;; ### gaea61-68
gaea6[1-8].ncrc.gov) MACHINE_ID=gaeac6 ;; ### gaea61-68

hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09
hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12
Expand Down Expand Up @@ -94,9 +96,12 @@ elif [[ -d /work ]]; then
else
MACHINE_ID=orion
fi
elif [[ -d /gpfs && -d /ncrc ]]; then
# We are on GAEA.
MACHINE_ID=gaea
elif [[ -d /gpfs/f5 && -d /ncrc ]]; then
# We are on GAEA C5.
MACHINE_ID=gaeac5
elif [[ -d /gpfs/f6 && -d /ncrc ]]; then
# We are on GAEA C6.
MACHINE_ID=gaeac6
elif [[ -d /data/prod ]]; then
# We are on SSEC's S4
MACHINE_ID=s4
Expand Down
File renamed without changes.
22 changes: 22 additions & 0 deletions tests/fv3_conf/compile_slurm.IN_gaeac6
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash -l
#SBATCH -e err
#SBATCH -o out
#SBATCH --account=@[ACCNR]
##SBATCH --qos=@[QUEUE]
#SBATCH --clusters=es
#SBATCH --partition=eslogin_c6
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --mem-per-cpu=4G
#SBATCH --time=180
#SBATCH --job-name="@[JBNME]"

set -eux

echo -n " $( date +%s )," > job_timestamp.txt
echo "Compile started: " `date`

@[PATHRT]/compile.sh @[MACHINE_ID] "@[MAKE_OPT]" @[COMPILE_ID] @[RT_COMPILER]

echo "Compile ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export OMP_STACKSIZE=1024M
export NC_BLKSZ=1M
export ESMF_RUNTIME_PROFILE=ON
export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
export FI_VERBS_PREFER_XRC=0

# Avoid job errors because of filesystem synchronization delays
sync && sleep 1
Expand Down
46 changes: 46 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_gaeac6
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash -l
#SBATCH -e err
#SBATCH -o out
#SBATCH --job-name="@[JBNME]"
#SBATCH --account=@[ACCNR]
#SBATCH --qos=@[QUEUE]
#SBATCH --clusters=c6
#SBATCH --partition=batch
#SBATCH --nodes=@[NODES]
#SBATCH --ntasks-per-node=@[TPN]
#SBATCH --time=@[WLCLK]

set -eux
echo -n " $( date +%s )," > job_timestamp.txt

set +x
MACHINE_ID=gaeac6
source ./module-setup.sh
module use --prepend $PWD/modulefiles
module load modules.fv3
module list
set -x

echo "Model started: " `date`

export OMP_NUM_THREADS=@[THRD]
export OMP_STACKSIZE=1024M
export NC_BLKSZ=1M
export ESMF_RUNTIME_PROFILE=ON
export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
export FI_VERBS_PREFER_XRC=0

# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
File renamed without changes.
68 changes: 68 additions & 0 deletions tests/logs/RegressionTests_gaeac6.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
====START OF GAEAC6 REGRESSION TESTING LOG====

UFSWM hash used in testing:
2ccc549348da37aac51ab44482174dff2bb2912d

Submodule hashes used in testing:
37cbb7d6840ae7515a9a8f0dfd4d89461b3396d1 AQM (v0.2.0-37-g37cbb7d)
be5d28fd1b60522e6fc98aefeead20e6aac3530b AQM/src/model/CMAQ (CMAQv5.2.1_07Feb2018-198-gbe5d28fd1)
1f9eaaa142c8b07ed6b788c9f44ea02cc86d0bae CDEPS-interface/CDEPS (cdeps0.4.17-42-g1f9eaaa)
635d9a100a736bd8d14ad091e879d5da6e4eb2bd CICE-interface/CICE (CICE6.0.0-373-g635d9a1)
4c87095256c1c599c3ccaa857a95744158751a60 CICE-interface/CICE/icepack (Icepack1.1.0-191-g4c87095)
dc977bcadd1ade1a528dee75f1ad45e8bd80ca0a CMEPS-interface/CMEPS (cmeps_v0.4.1-2310-gdc977bc)
cabd7753ae17f7bfcc6dad56daf10868aa51c3f4 CMakeModules (v1.0.0-28-gcabd775)
a9364591091c836984a40107729720705847c195 FV3 (heads/develop)
ac3055eff06099d61cd65e18bc4f0353ffd83f46 FV3/atmos_cubed_sphere (201912_public_release-405-gac3055e)
0f8232724975c13289cad390c9a71fa2c6a9bff4 FV3/ccpp/framework (2024-07-11-dev)
b6c433354394bd8ed5e46692a81149441ff4ae38 FV3/ccpp/physics (EP4-873-gb6c43335)
74a0e098b2163425e4b5466c2dfcf8ae26d560a5 FV3/ccpp/physics/physics/Radiation/RRTMGP/rte-rrtmgp (v1.6)
81b38a88d860ce7e34e8507c2246151a54d96a39 FV3/upp (upp_v10.2.0-218-g81b38a88)
-1ba8270870947b583cd51bc72ff8960f4c1fb36e FV3/upp/sorc/libIFI.fd
-567edcc94bc418d0dcd6cdaafed448eeb5aab570 FV3/upp/sorc/ncep_post.fd/post_gtg.fd
041422934cae1570f2f0e67239d5d89f11c6e1b7 GOCART (sdr_v2.1.2.6-119-g0414229)
bcf7777bb037ae2feb2a8a8ac51aacb3511b52d9 HYCOM-interface/HYCOM (2.3.00-122-gbcf7777)
5e0c21f64fa5b20efc8f29f8709766e1e6793a79 MOM6-interface/MOM6 (dev/master/repository_split_2014.10.10-10230-g5e0c21f64)
9423197f894112edfcb1502245f7d7b873d551f9 MOM6-interface/MOM6/pkg/CVMix-src (9423197)
29e64d652786e1d076a05128c920f394202bfe10 MOM6-interface/MOM6/pkg/GSW-Fortran (29e64d6)
3ac32f0db7a2a97d930f44fa5f060c983ff31ee8 NOAHMP-interface/noahmp (v3.7.1-436-g3ac32f0)
7f548c795a348bbb0fe4967dd25692c79036dc73 WW3 (6.07.1-346-g7f548c79)
05cad173feeb598431e3ef5f17c2df6562c8d101 fire_behavior (v0.2.0-1-g05cad17)
fad2fe9f42f6b7f744b128b4a2a9433f91e4296f stochastic_physics (ufs-v2.0.0-219-gfad2fe9)


NOTES:
[Times](Memory) are at the end of each compile/test in format [MM:SS](Size).
The first time is for the full script (prep+run+finalize).
The second time is specifically for the run phase.
Times/Memory will be empty for failed tests.

BASELINE DIRECTORY: /gpfs/f6/drsa-fire2/world-shared/Brian.Curtis/NEMSfv3gfs/develop-20240909
COMPARISON DIRECTORY: /gpfs/f6/drsa-fire2/scratch/Brian.Curtis/RT_RUNDIRS/Brian.Curtis/FV3_RT/rt_2186049

RT.SH OPTIONS USED:
* (-a) - HPC PROJECT ACCOUNT: drsa-fire2
* (-c) - CREATE NEW BASELINES
* (-n) - RUN SINGLE TEST: cpld_control_p8
* (-e) - USE ECFLOW

PASS -- COMPILE 's2swa_intel' [09:12, 08:08] ( 6 warnings 10 remarks )
FAILED: TEST TIMED OUT -- TEST 'cpld_control_p8_intel' [, ]( MB)

SYNOPSIS:
Starting Date/Time: 20241002 11:56:43
Ending Date/Time: 20241002 13:19:28
Total Time: 01h:23m:40s
Compiles Completed: 1/1
Tests Completed: 0/1
Failed Tests:
* TEST cpld_control_p8_intel: FAILED: TEST TIMED OUT
-- LOG: /gpfs/f6/drsa-fire2/scratch/Brian.Curtis/RT_RUNDIRS/Brian.Curtis/FV3_RT/rt_2186049/cpld_control_p8_intel/err

NOTES:
A file 'test_changes.list' was generated with list of all failed tests.
You can use './rt.sh -c -b test_changes.list' to create baselines for the failed tests.
If you are using this log as a pull request verification, please commit 'test_changes.list'.

Result: FAILURE

====END OF GAEAC6 REGRESSION TESTING LOG====
Loading

0 comments on commit 46edda9

Please sign in to comment.