Skip to content

Commit

Permalink
Enable AWS Parallel Works platform and Add Comprehensive End-To-End T…
Browse files Browse the repository at this point in the history
…ests (#333)

* Normalize Parallel Works cluster platform value

Set the value of platform to 'noaacloud' when SRW_PLATFORM matches a
Parallel Works cluster name.

* Enable the AWS Parallel Works platform

* Move agent declaration to stages

This change allows the platform filter to work correctly, otherwise, the
Parallel Works clusters would block indefinitely waiting to execute the
matrix on a agent/node that was not started.

* Ensure PROJ_LIB is set on Parallel Works platforms

* Fix final exit status of srw_test script

Some platforms do not recognize quoted variables within an arithmetic
expression. This change removes the quotes.

* Add comprehensive end-to-end tests option

* Add a parameter to the Jenkins pipeline that allows the comprehensive
set of workflow and end-to-end tests to be executed during the test
stage.
* Add logic to the Jenkins pipeline that checks for a specific Pull
Request label, then overrides the comprehensive end-to-end test
parameter's value if set.

* Clean up the workspace after a we2e test run

The experiments directory uses a lot of disk space. Removing it after
the end-to-end tests complete will allow us to keep the workspaces
longer. However, the test logs should be preserved. This change creates
a tarball containing the test logs in the workspace, which is archived,
then removes the experiments directory.

* Disable concurrent builds for branches and PRs

Prevent Jenkins from executing multiple pipelines at the same time for a
given branch or change request.

* Disable branch indexing triggers for pipeline

* Update we2e fundamental tests for srw_test script

* Log and archive the output of the srw build

* Update we2e comprehensive tests in srw_test script

* Update we2e fundamental/comprehensive tests

* Update we2e comprehensive tests

* Remove invalid tests from comprehensive list

* Update Parallel Works cluster names

* Remove regional_workflow from we2e_test_dir path
  • Loading branch information
Jesse McFarland authored Sep 29, 2022
1 parent e154110 commit dd0677b
Show file tree
Hide file tree
Showing 3 changed files with 288 additions and 21 deletions.
70 changes: 53 additions & 17 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@ pipeline {
agent none

options {
disableConcurrentBuilds()
overrideIndexTriggers(false)
skipDefaultCheckout(true)
}

parameters {
// Allow job runner to filter based on platform
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pcluster_noaa_v2_use1', 'azcluster_noaa', 'gcluster_noaa_v2_usc1'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// Allow job runner to filter based on compiler
choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build')
booleanParam name: 'SRW_WE2E_COMPREHENSIVE_TESTS', defaultValue: false, description: 'Whether to execute the comprehensive end-to-end tests'
}

stages {
/*
// Start the NOAA Parallel Works clusters, if necessary
stage('Start Parallel Works Clusters') {
matrix {
Expand All @@ -29,7 +31,7 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
values 'pcluster_noaa_v2_use1', 'azcluster_noaa', 'gcluster_noaa_v2_usc1'
values 'pclusternoaav2use1' //, 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}
}

Expand All @@ -44,7 +46,6 @@ pipeline {
}
}
}
*/

// Build and test the SRW application on all supported platforms using the supported compilers for each platform
stage('Build and Test') {
Expand All @@ -68,8 +69,7 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
// values 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pcluster_noaa_v2_use1', 'azcluster_noaa', 'gcluster_noaa_v2_usc1'
values 'cheyenne', 'gaea', 'hera', 'jet', 'orion'
values 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1' //, 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand All @@ -83,7 +83,7 @@ pipeline {
exclude {
axis {
name 'SRW_PLATFORM'
values 'gaea', 'hera', 'jet', 'orion'
values 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1' //, 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand All @@ -93,10 +93,6 @@ pipeline {
}
}

agent {
label env.SRW_PLATFORM
}

environment {
BRANCH_NAME_ESCAPED = env.BRANCH_NAME.replace('/', '_')
BUILD_VERSION = "${env.SRW_PLATFORM}-${env.SRW_COMPILER}-${env.BRANCH_NAME_ESCAPED}-${env.BUILD_NUMBER}"
Expand All @@ -106,6 +102,10 @@ pipeline {
stages {
// Clean the workspace, checkout the repository, and run checkout_externals
stage('Initialize') {
agent {
label env.SRW_PLATFORM
}

steps {
echo "Initializing SRW (${env.SRW_COMPILER}) build environment on ${env.SRW_PLATFORM}"
cleanWs()
Expand All @@ -116,6 +116,10 @@ pipeline {

// Run the unified build script; if successful create a tarball of the build and upload to S3
stage('Build') {
agent {
label env.SRW_PLATFORM
}

steps {
echo "Building SRW (${env.SRW_COMPILER}) on ${env.SRW_PLATFORM}"
sh 'bash --login "${WORKSPACE}/.cicd/scripts/srw_build.sh"'
Expand All @@ -124,29 +128,62 @@ pipeline {
post {
success {
sh 'tar --create --gzip --verbose --file "${WORKSPACE}/${BUILD_NAME}.tgz" bin include lib share'
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: true, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.BUILD_NAME}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: true, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.BUILD_NAME}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: true, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "build/srw_build-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.log", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
}
}
}

// Run the unified test script
stage('Test') {
agent {
label env.SRW_PLATFORM
}

environment {
SRW_WE2E_EXPERIMENT_BASE_DIR = "${env.WORKSPACE}/experiments"
}

steps {
echo "Testing SRW (${env.SRW_COMPILER}) on ${env.SRW_PLATFORM}"
sh 'bash --login "${WORKSPACE}/.cicd/scripts/srw_test.sh"'

// If executing for a Pull Request, check for the run_we2e_comprehensive_tests. If set,
// override the value of the SRW_WE2E_COMPREHENSIVE_TESTS parameter
script {
def run_we2e_comprehensive_tests = params.SRW_WE2E_COMPREHENSIVE_TESTS
def run_we2e_comprehensive_tests_label = 'run_we2e_comprehensive_tests'

if (env.CHANGE_ID) {
pullRequest.labels.each {
if (it == run_we2e_comprehensive_tests_label) {
run_we2e_comprehensive_tests = true
}
}
}

sh "SRW_WE2E_COMPREHENSIVE_TESTS=${run_we2e_comprehensive_tests} bash --login ${env.WORKSPACE}/.cicd/scripts/srw_test.sh"
}
}

post {
always {
// Archive the test log files and remove the experiments directory to conserve disk space
sh 'cd "${SRW_WE2E_EXPERIMENT_BASE_DIR}" && tar --create --gzip --verbose --file "${WORKSPACE}/we2e_test_logs-${SRW_PLATFORM}-${SRW_COMPILER}.tgz" */log.generate_FV3LAM_wflow */log.launch_FV3LAM_wflow */log/*'
sh 'rm -rf "${SRW_WE2E_EXPERIMENT_BASE_DIR}"'
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: 'we2e_test_results-*-*.txt', storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'woc-epic-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: 'we2e_test_logs-*-*.tgz', storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
}
}
}
}
}
}
}

/*
post {
always {
// Stop any Parallel Works clusters that were started during the pipeline execution
script {
def pw_clusters = ['pcluster_noaa_v2_use1', 'azcluster_noaa', 'gcluster_noaa_v2_usc1']
// def pw_clusters = ['pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1']
def pw_clusters = ['pclusternoaav2use1']
def clusters = []

// Determine which clusters need to be stopped, if any
Expand All @@ -166,5 +203,4 @@ pipeline {
}
}
}
*/
}
17 changes: 13 additions & 4 deletions .cicd/scripts/srw_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,25 @@ else
workspace="$(cd -- "${script_dir}/../.." && pwd)"
fi

# Normalize Parallel Works cluster platform value.
declare platform
if [[ "${SRW_PLATFORM}" =~ ^(az|g|p)clusternoaa ]]; then
platform='noaacloud'
else
platform="${SRW_PLATFORM}"
fi

build_dir="${workspace}/build"

# Set build related environment variables and load required modules.
source "${workspace}/etc/lmod-setup.sh" "${SRW_PLATFORM}"
source "${workspace}/etc/lmod-setup.sh" "${platform}"
module use "${workspace}/modulefiles"
module load "build_${SRW_PLATFORM}_${SRW_COMPILER}"
module load "build_${platform}_${SRW_COMPILER}"

# Compile SRW application and install to repository root.
mkdir "${build_dir}"
pushd "${build_dir}"
cmake -DCMAKE_INSTALL_PREFIX="${workspace}" -DENABLE_RRFS=on "${workspace}"
make -j "${MAKE_JOBS}"
build_log_file="${build_dir}/srw_build-${platform}-${SRW_COMPILER}.log"
cmake -DCMAKE_INSTALL_PREFIX="${workspace}" "${workspace}" | tee "${build_log_file}"
make -j "${MAKE_JOBS}" | tee --append "${build_log_file}"
popd
Loading

0 comments on commit dd0677b

Please sign in to comment.