-
Notifications
You must be signed in to change notification settings - Fork 249
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[develop] Enable workflow runs on single node linux/mac machine using…
… rocoto. (#508) * Increase precision of degs_per_radian to 15 digits. * Use generic date util. * Add fake slurm commands for rocoto usage on linux. * Modify machine files for linux and mac. * Modify linux and macos wflow modules. * Fix unittest. * Remove openmpi module loading in linux/mac build modulefile. * Fix sacct. * Fix crontab unspecified USER issue. * Add EXTRN_MDL_DATA_STORES to macos. * Add more states to squeue/sacct. * Add a taskthrottle=1 option for linux/mac. * Don't specifiy number of processes for mpirun. * Get exit code directly instead of from log file. * Set taskthrottle to 1000 by default. * Fix linux lmod path bug. * Set stack size to unlimited for linux/mac. * Fix unittest.
- Loading branch information
1 parent
d3b10e6
commit 70da0e8
Showing
19 changed files
with
270 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,31 @@ | ||
platform: | ||
WORKFLOW_MANAGER: none | ||
WORKFLOW_MANAGER: rocoto | ||
NCORES_PER_NODE: 8 | ||
SCHED: none | ||
RUN_CMD_FCST: 'mpirun -n ${PE_MEMBER01} ' | ||
RUN_CMD_POST: 'mpirun -n 4 ' | ||
TASKTHROTTLE: 1 | ||
SCHED: slurm | ||
CCPA_OBS_DIR: /home/username/DATA/UFS/obs_data/ccpa/proc | ||
MRMS_OBS_DIR: /home/username/DATA/UFS/obs_data/mrms/proc | ||
NDAS_OBS_DIR: /home/username/DATA/UFS/obs_data/ndas/proc | ||
METPLUS_PATH: "" | ||
MET_BIN_EXEC: bin | ||
MET_INSTALL_DIR: "" | ||
DOMAIN_PREGEN_BASEDIR: /home/username/DATA/UFS/FV3LAM_pregen | ||
RUN_CMD_FCST: mpirun -n ${PE_MEMBER01} | ||
RUN_CMD_POST: mpirun | ||
RUN_CMD_SERIAL: time | ||
RUN_CMD_UTILS: mpirun -n 4 | ||
PRE_TASK_CMDS: '{ ulimit -a; }' | ||
RUN_CMD_UTILS: mpirun | ||
PRE_TASK_CMDS: '{ ulimit -a; ulimit -s unlimited; }' | ||
TEST_EXTRN_MDL_SOURCE_BASEDIR: /home/username/DATA/UFS/input_model_data | ||
TEST_PREGEN_BASEDIR: /home/username/DATA/UFS/FV3LAM_pregen | ||
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /home/username/DATA/UFS/dummy_FV3GFS_sys_dir | ||
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /home/username/DATA/UFS/dummy_FV3GFS_sys_dir | ||
FIXaer: /home/username/DATA/UFS/fix/fix_aer | ||
FIXgsm: /home/username/DATA/UFS/fix/fix_am | ||
FIXlut: /home/username/DATA/UFS/fix/fix_lut | ||
FIXorg: /home/username/DATA/UFS/fix/fix_orog | ||
FIXsfc: /home/username/DATA/UFS/fix/fix_sfc_climo | ||
FIXshp: /home/username/DATA/UFS/NaturalEarth | ||
EXTRN_MDL_DATA_STORES: aws nomads | ||
data: | ||
ics_lbcs: | ||
FV3GFS: /home/username/DATA/UFS/FV3GFS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,31 @@ | ||
platform: | ||
WORKFLOW_MANAGER: none | ||
WORKFLOW_MANAGER: rocoto | ||
NCORES_PER_NODE: 8 | ||
SCHED: none | ||
RUN_CMD_FCST: 'mpirun -n ${PE_MEMBER01} ' | ||
RUN_CMD_POST: 'mpirun -n 4 ' | ||
TASKTHROTTLE: 1 | ||
SCHED: slurm | ||
CCPA_OBS_DIR: /Users/username/DATA/UFS/obs_data/ccpa/proc | ||
MRMS_OBS_DIR: /Users/username/DATA/UFS/obs_data/mrms/proc | ||
NDAS_OBS_DIR: /Users/username/DATA/UFS/obs_data/ndas/proc | ||
DOMAIN_PREGEN_BASEDIR: /Users/username/DATA/UFS/FV3LAM_pregen | ||
METPLUS_PATH: "" | ||
MET_BIN_EXEC: bin | ||
MET_INSTALL_DIR: "" | ||
RUN_CMD_FCST: mpirun -n ${PE_MEMBER01} | ||
RUN_CMD_POST: mpirun | ||
RUN_CMD_SERIAL: time | ||
RUN_CMD_UTILS: mpirun -n 4 | ||
PRE_TASK_CMDS: '{ ulimit -a; }' | ||
RUN_CMD_UTILS: mpirun | ||
PRE_TASK_CMDS: '{ ulimit -a; ulimit -s unlimited; }' | ||
TEST_EXTRN_MDL_SOURCE_BASEDIR: /Users/username/DATA/UFS/input_model_data | ||
TEST_PREGEN_BASEDIR: /Users/username/DATA/UFS/FV3LAM_pregen | ||
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /Users/username/DATA/UFS/dummy_FV3GFS_sys_dir | ||
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /Users/username/DATA/UFS/dummy_FV3GFS_sys_dir | ||
FIXaer: /Users/username/DATA/UFS/fix/fix_aer | ||
FIXgsm: /Users/username/DATA/UFS/fix/fix_am | ||
FIXlut: /Users/username/DATA/UFS/fix/fix_lut | ||
FIXorg: /Users/username/DATA/UFS/fix/fix_orog | ||
FIXsfc: /Users/username/DATA/UFS/fix/fix_sfc_climo | ||
FIXshp: /Users/username/DATA/UFS/NaturalEarth | ||
EXTRN_MDL_DATA_STORES: aws nomads | ||
data: | ||
ics_lbcs: | ||
FV3GFS: /Users/username/DATA/UFS/FV3GFS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/bin/bash | ||
|
||
# Emulates slurm's sacct | ||
if [[ "$1" = "--jobs="* ]]; then | ||
PIDS="${1:7}" | ||
PIDS="${PIDS//,/' '}" | ||
elif [[ -f .job_database ]]; then | ||
PIDS=$(cat .job_database | grep submitted | sort -u -k1,1 | awk '{print $3}') | ||
fi | ||
|
||
# Output info the way rocoto calls sacct | ||
FMT="%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" | ||
echo "JobID|User|JobName|Partition|Priority|Submit|Start|End|NCPUS|ExitCode|State" | ||
|
||
for pid in ${PIDS}; do | ||
|
||
t_sub="N/A" | ||
t_start=$t_sub | ||
t_end=$t_sub | ||
name=$pid | ||
user=${USER:-user} | ||
exitc=0 | ||
state="UNKNOWN" | ||
|
||
v=$(cat .job_database | grep "pid $pid submitted" | awk '{print $1" "$5}') | ||
if [ ! -z "$v" ]; then | ||
state="PENDING" | ||
read name t_sub <<< "$v" | ||
v=$(cat .job_database | grep "pid $pid started" | awk '{print $5" "$7}') | ||
if [ ! -z "$v" ]; then | ||
state="RUNNING" | ||
read t_start t_end<<< "$v" | ||
fi | ||
v=$(cat .job_database | grep "pid $pid ended" | awk '{print $5" "$7}') | ||
if [ ! -z "$v" ]; then | ||
state="COMPLETED" | ||
read t_end exitc <<< "$v" | ||
fi | ||
fi | ||
|
||
printf "$FMT" $pid ${user:0:30} ${name:0:30} linux 0.1 $t_sub $t_start $t_end 1 $exitc $state | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
|
||
# Emulates slurm's sbatch | ||
|
||
FD=${1:-/dev/stdin} | ||
|
||
#parse log file | ||
LOG=`grep "#SBATCH -o" $FD | awk '{ print $3 }'` | ||
if [ -z "$LOG" ]; then | ||
LOG=/dev/null | ||
fi | ||
|
||
#parse time | ||
TIM=`grep "#SBATCH -t" $FD | awk '{ print $3 }'` | ||
if [ -z "$TIM" ]; then | ||
SECS= | ||
CTIM= | ||
else | ||
SECS=`echo $TIM | awk 'BEGIN { FS = ":" } ; { secs = $1 * 3600 + $2 * 60 + $3; print secs };'` | ||
CTIM="timeout ${SECS}s" | ||
fi | ||
|
||
#parse job name | ||
JOBNAME=`grep "#SBATCH --job-name" $FD | awk 'BEGIN { FS = "=" }; { print $2 }'` | ||
if [ -z "$JOBNAME" ]; then | ||
JOBNAME="default" | ||
fi | ||
|
||
#command | ||
CMD="`cat $FD`" | ||
|
||
#execute job in background | ||
bash -c "\ | ||
ds=\$(date --utc +%Y-%m-%d:%H:%M:%S); \ | ||
de=\$(date --utc -d '$SECS sec' +%Y-%m-%d:%H:%M:%S); \ | ||
echo $JOBNAME pid \$$ started \$ds ends \$de >>.job_database; \ | ||
\ | ||
${CTIM} ${CMD} &>$LOG; \ | ||
excode=\$?; \ | ||
\ | ||
de=\$(date --utc +%Y-%m-%d:%H:%M:%S); \ | ||
echo $JOBNAME pid \$$ ended \$de exitcode \$excode >>.job_database;" & | ||
|
||
#submission info | ||
pid=$! | ||
dsub=$(date --utc +%Y-%m-%d:%H:%M:%S) | ||
echo $JOBNAME pid $pid submitted $dsub >>.job_database | ||
echo "Submitted batch job "$pid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
# Emulates slurm's scancel | ||
exec kill -9 -$1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
# Emulates slurm's sinfo | ||
exec lscpu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/bin/bash | ||
|
||
# Emulates slurm's squeue | ||
if [[ "$1" = "--jobs="* ]]; then | ||
PIDS="${1:7}" | ||
PIDS="${PIDS//,/' '}" | ||
elif [[ -f .job_database ]]; then | ||
PIDS=$(cat .job_database | grep submitted | sort -u -k1,1 | awk '{print $3}') | ||
fi | ||
|
||
# Output info the way rocoto calls squeue | ||
FMT="%-40s%-40s%-10s%-20s%-30s%-30s%-30s%-30s%-10s%-30s%-200s\n" | ||
printf "$FMT" JOBID USER CPUS PARTITION SUBMIT_TIME START_TIME END_TIME PRIORITY EXIT_CODE STATE NAME | ||
|
||
for pid in ${PIDS}; do | ||
|
||
t_sub="N/A" | ||
t_start=$t_sub | ||
t_end=$t_sub | ||
name=$pid | ||
user=${USER:-user} | ||
exitc=0 | ||
state="UNKNOWN" | ||
|
||
v=$(cat .job_database | grep "pid $pid submitted" | awk '{print $1" "$5}') | ||
if [ ! -z "$v" ]; then | ||
state="PENDING" | ||
read name t_sub <<< "$v" | ||
v=$(cat .job_database | grep "pid $pid started" | awk '{print $5" "$7}') | ||
if [ ! -z "$v" ]; then | ||
state="RUNNING" | ||
read t_start t_end<<< "$v" | ||
fi | ||
v=$(cat .job_database | grep "pid $pid ended" | awk '{print $5" "$7}') | ||
if [ ! -z "$v" ]; then | ||
state="COMPLETED" | ||
read t_end exitc <<< "$v" | ||
fi | ||
fi | ||
|
||
printf "$FMT" $pid $user 1 linux $t_sub $t_start $t_end 0.1 $exitc $state $name | ||
done |
Oops, something went wrong.