Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RT updates: remove compiler from labels #129

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 19 additions & 22 deletions tests/auto/jobs/bl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ def run(job_obj):
new_baseline, blstore = set_directories(job_obj)
pr_repo_loc, repo_dir_str = clone_pr_repo(job_obj)
bldate = get_bl_date(job_obj, pr_repo_loc)
bldir = f'{blstore}/main-{bldate}/{job_obj.compiler.upper()}'
bldirbool = check_for_bl_dir(bldir, job_obj)
bldir = f'{blstore}/main-{bldate}'
run_regression_test(job_obj, pr_repo_loc)
post_process(job_obj, pr_repo_loc, repo_dir_str, new_baseline, bldir, bldate, blstore)

Expand All @@ -26,29 +25,25 @@ def set_directories(job_obj):
rt_dir = '/scratch1/NCEPDEV/nems/emc.nemspara/'
workdir = f'{rt_dir}/autort/pr'
blstore = f'{rt_dir}/RT/NEMSfv3gfs'
new_baseline = f'{rt_dir}/FV3_RT/'\
f'REGRESSION_TEST_{job_obj.compiler.upper()}'
new_baseline = f'{rt_dir}/FV3_RT/REGRESSION_TEST'
elif machine == 'jet':
rt_dir = '/lfs4/HFIP/h-nems/emc.nemspara/'
workdir = f'{rt_dir}/autort/pr'
blstore = f'{rt_dir}/RT/NEMSfv3gfs'
new_baseline = '{rt_dir}/RT_BASELINE/'\
f'emc.nemspara/FV3_RT/REGRESSION_TEST_{job_obj.compiler.upper()}'
new_baseline = f'{rt_dir}/RT_BASELINE/'\
'emc.nemspara/FV3_RT/REGRESSION_TEST'
elif machine == 'gaea':
workdir = '/lustre/f2/pdata/ncep/emc.nemspara/autort/pr'
blstore = '/lustre/f2/pdata/ncep_shared/emc.nemspara/RT/NEMSfv3gfs'
new_baseline = '/lustre/f2/scratch/emc.nemspara/FV3_RT/'\
f'REGRESSION_TEST_{job_obj.compiler.upper()}'
new_baseline = '/lustre/f2/scratch/emc.nemspara/FV3_RT/REGRESSION_TEST'
elif machine == 'orion':
workdir = '/work/noaa/nems/emc.nemspara/autort/pr'
blstore = '/work/noaa/nems/emc.nemspara/RT/NEMSfv3gfs'
new_baseline = '/work/noaa/stmp/bcurtis/stmp/bcurtis/FV3_RT/'\
f'REGRESSION_TEST_{job_obj.compiler.upper()}'
new_baseline = '/work/noaa/stmp/bcurtis/stmp/bcurtis/FV3_RT/REGRESSION_TEST'
elif machine == 'cheyenne':
workdir = '/glade/scratch/dtcufsrt/autort/tests/auto/pr'
blstore = '/glade/p/ral/jntp/GMTB/ufs-weather-model/RT/NEMSfv3gfs'
new_baseline = '/glade/scratch/dtcufsrt/FV3_RT/'\
f'REGRESSION_TEST_{job_obj.compiler.upper()}'
new_baseline = '/glade/scratch/dtcufsrt/FV3_RT/REGRESSION_TEST'

if not job_obj.clargs.workdir:
job_obj.workdir = workdir
Expand All @@ -73,7 +68,8 @@ def set_directories(job_obj):

def check_for_bl_dir(bldir, job_obj):
logger = logging.getLogger('BL/CHECK_FOR_BL_DIR')
logger.info('Checking if baseline directory exists')
logger.info('Checking if baseline directory exists:')
logger.info(bldir)
if os.path.exists(bldir):
logger.critical(f'Baseline dir: {bldir} exists. It should not, yet.')
job_obj.comment_text_append(f'[BL] ERROR: Baseline location exists before '
Expand All @@ -95,14 +91,11 @@ def run_regression_test(job_obj, pr_repo_loc):
logger = logging.getLogger('BL/RUN_REGRESSION_TEST')

rt_command = 'cd tests'
rt_command += f' && export RT_COMPILER="{job_obj.compiler}"'
if job_obj.workdir:
rt_command += f' && export RUNDIR_ROOT={job_obj.workdir}'
if job_obj.clargs.new_baseline:
rt_command += f' && export NEW_BASELINE={job_obj.clargs.new_baseline}'
rt_command += f' && /bin/bash --login ./rt.sh -e -a {job_obj.clargs.account} -c -p {job_obj.clargs.machine} -n control_p8 intel'
if job_obj.compiler == 'gnu':
rt_command += f' -l rt_gnu.conf'
rt_command += f' && /bin/bash --login ./rt.sh -e -a {job_obj.clargs.account} -c -p {job_obj.clargs.machine}'
if job_obj.clargs.envfile:
rt_command += f' -s {job_obj.clargs.envfile}'
rt_command += f' {job_obj.clargs.additional_args}'
Expand Down Expand Up @@ -155,13 +148,17 @@ def post_process(job_obj, pr_repo_loc, repo_dir_str, new_baseline, bldir, bldate
filepath = f'{pr_repo_loc}/{rt_log}'
rt_dir, logfile_pass = process_logfile(job_obj, filepath)
if logfile_pass:
create_bl_dir(bldir, job_obj)
move_bl_command = [[f'mv {new_baseline}/* {bldir}/', pr_repo_loc]]
job_obj.run_commands(logger, move_bl_command)
job_obj.comment_text_append('[BL] Baseline creation and move successful')
job_obj.comment_text_append(f'***Baseline creation successful on {job_obj.clargs.machine}***')
logger.info('Starting RT Job')
# Update baseline to newly created baseline, then run new test
logging.info(f"{job_obj.baseline=}")
logging.info(f"{job_obj.clargs.new_baseline=}")
job_obj.baseline = job_obj.clargs.new_baseline
rt.run(job_obj)
logger.info('Finished with RT Job')
else:
logger.critical(f'Baseline created but RT failed, see log files for details')
job_obj.job_failed(logger, f'{job_obj.preq_dict["action"]}')


def get_bl_date(job_obj, pr_repo_loc):
Expand Down Expand Up @@ -210,6 +207,6 @@ def process_logfile(job_obj, logfile):
logger.critical(f'Log file exists but is not complete')
job_obj.job_failed(logger, f'{job_obj.preq_dict["action"]}')
else:
logger.critical(f'Could not find {job_obj.clargs.machine}.{job_obj.compiler} '
logger.critical(f'Could not find {job_obj.clargs.machine} '
f'{job_obj.preq_dict["action"]} log: {logfile}')
raise FileNotFoundError
18 changes: 9 additions & 9 deletions tests/auto/jobs/rt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@ def run_regression_test(job_obj, pr_repo_loc):
logger = logging.getLogger('RT/RUN_REGRESSION_TEST')

rt_command = 'cd tests'
rt_command += f' && export RT_COMPILER="{job_obj.compiler}"'
if job_obj.workdir:
rt_command += f' && export RUNDIR_ROOT={job_obj.workdir}'
if job_obj.baseline:
rt_command += f' && export RTPWD={job_obj.baseline}'
rt_command += f' && /bin/bash --login ./rt.sh -e -a {job_obj.clargs.account} -p {job_obj.clargs.machine}'
if job_obj.compiler == 'gnu':
rt_command += f' -l rt_gnu.conf'
if job_obj.clargs.envfile:
rt_command += f' -s {job_obj.clargs.envfile}'
rt_command += f' {job_obj.clargs.additional_args}'
Expand Down Expand Up @@ -79,14 +78,16 @@ def post_process(job_obj, pr_repo_loc, repo_dir_str, branch):
move_rt_commands = [
[f'git pull --ff-only origin {branch}', pr_repo_loc],
[f'git add {rt_log}', pr_repo_loc],
[f'git commit -m "[AutoRT] {job_obj.clargs.machine}'
f'.{job_obj.compiler} Job Completed.\n\n\n'
[f'git commit -m "[AutoRT] {job_obj.clargs.machine} Job Completed.\n\n\n'
f'on-behalf-of {job_obj.gitargs["github"]["org"]} @{job_obj.gitargs["config"]["user.name"]}"',
pr_repo_loc],
['sleep 10', pr_repo_loc],
[f'git push origin {branch}', pr_repo_loc]
]
job_obj.run_commands(logger, move_rt_commands)
# job_obj.run_commands(logger, move_rt_commands)
job_obj.comment_text_pop()
job_obj.comment_text_append(f'***Regression test successful on {job_obj.clargs.machine}!***')
job_obj.preq_dict['preq'].create_issue_comment(job_obj.comment_text)
else:
job_obj.comment_text_append(f'[RT] Log file shows failures.')
job_obj.comment_text_append(f'[RT] Please obtain logs from {pr_repo_loc}')
Expand All @@ -110,9 +111,8 @@ def process_logfile(job_obj, logfile):
job_obj.job_failed(logger, f'{job_obj.preq_dict["action"]}')
return rt_dir, False
else:
logger.critical(f'Could not find {job_obj.clargs.machine}'
f'.{job_obj.compiler} '
logger.critical(f'Could not find {job_obj.clargs.machine} '
f'{job_obj.preq_dict["action"]} log:\n{logfile}')
print(f'Could not find {job_obj.clargs.machine}.{job_obj.compiler} '
print(f'Could not find {job_obj.clargs.machine} '
f'{job_obj.preq_dict["action"]} log:\n{logfile}')
raise FileNotFoundError
45 changes: 24 additions & 21 deletions tests/auto/rt_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,24 @@ def __init__(self):

def set_action_from_label(machine, actions, label):
''' Match the label that initiates a job with an action in the dict'''
# <machine>-<compiler>-<test> i.e. hera-gnu-RT
# <machine>-<test> i.e. hera-gnu-RT
logger = logging.getLogger('MATCH_LABEL_WITH_ACTIONS')
logger.info('Setting action from Label')
split_label = label.name.split('-')
# Make sure it has three parts
if len(split_label) != 3:
# Make sure it has two parts
if len(split_label) != 2:
return False, False
# Break the parts into their variables
label_machine = split_label[0]
label_compiler = split_label[1]
label_action = split_label[2]
label_action = split_label[1]
# check machine name matches
if not re.match(label_machine, machine):
return False, False
# Compiler must be intel or gnu
if not str(label_compiler) in ["intel", "gnu"]:
return False, False
action_match = next((action for action in actions
if re.match(action, label_action)), False)

logging.info(f'Compiler: {label_compiler}, Action: {action_match}')
return label_compiler, action_match
logging.info(f'Action: {action_match}')
return action_match

def delete_pr_dirs(each_pr, machine, workdir):
ids = [str(pr.id) for pr in each_pr]
Expand Down Expand Up @@ -134,19 +130,18 @@ def get_preqs_with_actions(repos, args, ghinterface_obj, actions, git_cfg):
.get_pulls(state='open', sort='created', base=repo['base'])
for repo in repos]
each_pr = [preq for gh_preq in gh_preqs for preq in gh_preq]
delete_pr_dirs(each_pr, args.machine, args.workdir)
# delete_pr_dirs(each_pr, args.machine, args.workdir)
preq_labels = [{'preq': pr, 'label': label} for pr in each_pr
for label in pr.get_labels()]

jobs = []
# return_preq = []
for pr_label in preq_labels:
compiler, match = set_action_from_label(args.machine, actions,
pr_label['label'])
match = set_action_from_label(args.machine, actions, pr_label['label'])
if match:
pr_label['action'] = match
# return_preq.append(pr_label.copy())
jobs.append(Job(pr_label.copy(), ghinterface_obj, args, compiler, git_cfg))
jobs.append(Job(pr_label.copy(), ghinterface_obj, args, git_cfg))

return jobs

Expand All @@ -168,22 +163,28 @@ class Job:
provided by the bash script
'''

def __init__(self, preq_dict, ghinterface_obj, args, compiler, gitargs):
def __init__(self, preq_dict, ghinterface_obj, args, gitargs):
self.logger = logging.getLogger('JOB')
self.preq_dict = preq_dict
self.job_mod = importlib.import_module(
f'jobs.{self.preq_dict["action"].lower()}')
self.ghinterface_obj = ghinterface_obj
self.clargs = args
self.compiler = compiler
self.gitargs = gitargs
self.comment_text = '***Automated RT Failure Notification***\n'
self.failed_tests = []
self.workdir = args.workdir
self.baseline = args.baseline

def comment_text_append(self, newtext):
self.comment_text += f'{newtext}\n'

def comment_text_pop(self, position=0):
newcomment = self.comment_text.split("\n")
newcomment.pop(position)
print(newcomment)
self.comment_text = "\n".join(newcomment)

def remove_pr_label(self):
''' Removes the PR label that initiated the job run from PR '''
self.logger.info(f'Removing Label: {self.preq_dict["label"]}')
Expand All @@ -193,7 +194,6 @@ def check_label_before_job_start(self):
# LETS Check the label still exists before the start of the job in the
# case of multiple jobs
label_to_check = f'{self.clargs.machine}'\
f'-{self.compiler}'\
f'-{self.preq_dict["action"]}'
labels = self.preq_dict['preq'].get_labels()
label_match = next((label for label in labels
Expand All @@ -215,7 +215,11 @@ def run_commands(self, logger, commands_with_cwd):
try:
out, err = output.communicate()
out = [] if not out else out.decode('utf8').split('\n')
logger.info(out)
if isinstance(out, str)
logger.info(out)
else:
for o in out:
logger.info(out[o])
except Exception as e:
err = [] if not err else err.decode('utf8').split('\n')
self.job_failed(logger, f'Command {command}', exception=e,
Expand All @@ -227,7 +231,6 @@ def run(self):
logger = logging.getLogger('JOB/RUN')
logger.info(f'Starting Job: {self.preq_dict["label"]}')
self.comment_text_append(newtext=f'Machine: {self.clargs.machine}')
self.comment_text_append(f'Compiler: {self.compiler}')
self.comment_text_append(f'Job: {self.preq_dict["action"]}')
if self.check_label_before_job_start():
try:
Expand All @@ -249,7 +252,6 @@ def send_comment_text(self):
self.comment_text_append('Please make changes and add '
'the following label back: '
f'{self.clargs.machine}'
f'-{self.compiler}'
f'-{self.preq_dict["action"]}')

self.preq_dict['preq'].create_issue_comment(self.comment_text)
Expand All @@ -259,7 +261,8 @@ def job_failed(self, logger, job_name, exception=None, STDOUT=False,
logger.critical(f'{job_name} FAILED.')

if STDOUT:
logger.critical(f'STDOUT: {[item for item in out if not None]}')
for o in out:
logger.critical(f'STDOUT: {o}')
logger.critical(f'STDERR: {[eitem for eitem in err if not None]}')
# if exception is not None:
# raise
Expand Down