Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: mirror-data-to-staging-on-deploy #6044

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
63 changes: 62 additions & 1 deletion .github/workflows/deploy-happy-stack.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
name: Deploy Happy

on: deployment
# TODO: Reinstate. This is for temporarily testing mirroring job
#on: deployment

# TODO: Remove. This is for temporarily testing mirroring job
on:
push:
branches:
- atol/5480-mirror-data-to-staging-on-deploy

env:
DOCKER_BUILDKIT: 1
Expand All @@ -13,8 +20,54 @@ permissions:
deployments: write

jobs:
mirror-prod-rds-data-to-staging:
runs-on: ubuntu-22.04
# TODO: Reinstate. This is for temporarily testing mirroring job
# if: github.event.deployment.environment == 'staging'
steps:
- name: install requirements
run: |
pip install awscli==1.29.1

- name:
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 2700
- name: Install aws cli
uses: unfor19/install-aws-cli-action@v1
with:
version: "2"
- name: Dump from prod # TODO: Add actual mirroring command as ECS task execution
run: echo TODO
- name: Load to staging # TODO: Add actual mirroring command as ECS task execution
run: echo TODO

mirror-prod-s3-data-to-staging:
runs-on: ubuntu-22.04
# TODO: Reinstate. This is for temporarily testing mirroring job
# if: github.event.deployment.environment == 'staging'
steps:
- name: install requirements
run: |
pip install awscli==1.29.1
- name:
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-duration-seconds: 2700
- name: Install aws cli
uses: unfor19/install-aws-cli-action@v1
with:
version: "2"
- name: Run S3 mirroring # TODO: Add actual mirroring command as ECS task execution
run: echo TODO

upgrade:
runs-on: ubuntu-22.04
if: false # TODO: Remove. This is for temporarily testing mirroring job, above, avoiding the deploy
steps:
- uses: actions/setup-node@v2
with:
Expand Down Expand Up @@ -133,6 +186,8 @@ jobs:
runs-on: ubuntu-22.04
needs:
- upgrade
- mirror-prod-s3-data-to-staging
- mirror-prod-rds-data-to-staging
steps:
- uses: actions/setup-node@v2
with:
Expand Down Expand Up @@ -203,6 +258,8 @@ jobs:
runs-on: ubuntu-22.04
needs:
- upgrade
- mirror-prod-s3-data-to-staging
- mirror-prod-rds-data-to-staging
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -303,6 +360,8 @@ jobs:
timeout-minutes: 30
needs:
- upgrade
- mirror-prod-s3-data-to-staging
- mirror-prod-rds-data-to-staging
steps:
- name: Configure AWS Prod Credentials
uses: aws-actions/configure-aws-credentials@v2
Expand Down Expand Up @@ -358,6 +417,8 @@ jobs:
timeout-minutes: 30
needs:
- upgrade
- mirror-prod-s3-data-to-staging
- mirror-prod-rds-data-to-staging
steps:
- name: Configure AWS Prod Credentials
uses: aws-actions/configure-aws-credentials@v2
Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/push-tests.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
name: Push Tests

on:
pull_request:
branches:
- "*"
push:
branches:
- "main"
# TODO: revert
# pull_request:
# branches:
# - "*"
# push:
# branches:
# - "main"

env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/rdev-update-for-pr.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Create/Update rdev for PR

on:
if: false # TODO: revert
pull_request:
branches-ignore:
- prod
Expand Down
37 changes: 37 additions & 0 deletions .happy/terraform/modules/ecs-stack/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ locals {
"--keep-alive", "61", "--log-level", "info"]
data_load_path = "s3://${local.secret["s3_buckets"]["env"]["name"]}/database/seed_data_04_2f30f3bcc9aa.sql"

# env mirroring
env_mirroring_db_dump_cmd = ["/single-cell-data-portal/backend/scripts/env_mirroring_aws_ecs/db_dump.sh"]
env_mirroring_db_load_cmd = ["/single-cell-data-portal/backend/scripts/env_mirroring_aws_ecs/db_load.sh"]
env_mirroring_s3_sync_cmd = ["/single-cell-data-portal/backend/scripts/env_mirroring_aws_ecs/s3_sync.sh"]
env_mirroring_db_dump_s3_uri = "s3://${local.secret["s3_buckets"]["env"]["name"]}/database/env_mirroring_aws_ecs/prod.pg_dump"

vpc_id = local.secret["cloud_env"]["vpc_id"]
subnets = local.secret["cloud_env"]["private_subnets"]
security_groups = local.secret["security_groups"]
Expand Down Expand Up @@ -228,6 +234,37 @@ module cg_batch {
batch_container_memory_limit = var.cg_batch_container_memory_limit
}

module env_mirroring_db_dump {
source = "../env-mirroring-ecs"
image = "${local.backend_image_repo}:${local.image_tag}"
task_role_arn = local.ecs_role_arn
cmd = local.env_mirroring_db_dump_cmd
custom_stack_name = local.custom_stack_name
deployment_stage = local.deployment_stage
execution_role = local.ecs_execution_role
db_dump_s3_uri = local.env_mirroring_db_dump_s3_uri
}

module env_mirroring_db_load {
source = "../env-mirroring-ecs"
image = "${local.backend_image_repo}:${local.image_tag}"
task_role_arn = local.ecs_role_arn
cmd = local.env_mirroring_db_load_cmd
custom_stack_name = local.custom_stack_name
deployment_stage = local.deployment_stage
execution_role = local.ecs_execution_role
db_dump_s3_uri = local.env_mirroring_db_dump_s3_uri
}

module env_mirroring_s3_sync {
source = "../env-mirroring-ecs"
image = "${local.backend_image_repo}:${local.image_tag}"
task_role_arn = local.ecs_role_arn
cmd = local.env_mirroring_s3_sync_cmd
custom_stack_name = local.custom_stack_name
deployment_stage = local.deployment_stage
execution_role = local.ecs_execution_role
}

module upload_success_lambda {
source = "../lambda"
Expand Down
63 changes: 63 additions & 0 deletions .happy/terraform/modules/env-mirroring-ecs/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# ECS task definition for Environment Data Mirroring task (for CI/CD, invoked by GHA)

data aws_region current {}

locals {
job_name = "env-mirroring"
}

resource aws_ecs_task_definition task_definition {
family = "dp-${var.deployment_stage}-${var.custom_stack_name}-${local.job_name}"
network_mode = "awsvpc"
cpu = 2048
memory = 4096
task_role_arn = var.task_role_arn
execution_role_arn = var.execution_role
requires_compatibilities = [ "FARGATE" ]
container_definitions = <<EOF
[
{
"name": "${local.job_name}",
"essential": true,
"image": "${var.image}",
"memory": 512,
"environment": [
{
"name": "AWS_REGION",
"value": "${data.aws_region.current.name}"
},
{
"name": "AWS_DEFAULT_REGION",
"value": "${data.aws_region.current.name}"
},
{
"name": "REMOTE_DEV_PREFIX",
"value": "${var.remote_dev_prefix}"
},
{
"name": "DEPLOYMENT_STAGE",
"value": "${var.deployment_stage}"
},
{
"name": "DB_DUMP_S3_URI",
"value": "${var.db_dump_s3_uri}"
}
],
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-stream-prefix": "fargate",
"awslogs-group": "${aws_cloudwatch_log_group.cloud_watch_logs_group.id}",
"awslogs-region": "${data.aws_region.current.name}"
}
},
"command": ${jsonencode(var.cmd)}
}
]
EOF
}

resource aws_cloudwatch_log_group cloud_watch_logs_group {
retention_in_days = 365
name = "/dp/${var.deployment_stage}/${var.custom_stack_name}/${local.job_name}"
}
4 changes: 4 additions & 0 deletions .happy/terraform/modules/env-mirroring-ecs/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
output task_definition_arn {
value = aws_ecs_task_definition.task_definition.arn
description = "ARN of the Migration ECS Task Definition"
}
42 changes: 42 additions & 0 deletions .happy/terraform/modules/env-mirroring-ecs/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
variable image {
type = string
description = "Image name"
}

variable task_role_arn {
type = string
description = "ARN for the role assumed by tasks"
}

variable execution_role {
type = string
description = "Execution role to use for fargate tasks - required for fargate services!"
}

variable cmd {
type = list(string)
description = "Command to run"
default = []
}

variable custom_stack_name {
type = string
description = "Please provide the stack name"
}

variable remote_dev_prefix {
type = string
description = "S3 storage path / db schema prefix"
default = ""
}

variable db_dump_s3_uri {
type = string
description = "S3 location where a pg_dump file is located for dumping from one env and restoring to another"
default = ""
}

variable deployment_stage {
type = string
description = "The name of the deployment stage of the Application"
}
22 changes: 22 additions & 0 deletions backend/scripts/env_mirroring_aws_ecs/db_dump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Dump RDS (postgres db) data from production env to a db dump file stored on S3.
#
# This script is intended to be run in as an AWS ECS task, where the IAM role is appropriately configured to allow for
# prod env RDS access.

LOCAL_DB_DUMP_FILE=db_dump.psql
if [[ -z $DEPLOYMENT_STAGE ]]; then
echo "DEPLOYMENT_STAGE is not set"
exit 1
fi
if [[ -z $DB_DUMP_S3_URI ]]; then
echo "DB_DUMP_S3_URI is not set"
exit 1
fi
if [[ -z $GHA_COMMIT ]]; then
echo "GHA_COMMIT is not set"
exit 1
fi

eval `aws secretsmanager get-secret-value --secret-id corpora/backend/prod/database --region us-west-2 | jq -r '.SecretString' | jq -r '.database_uri | match("postgresql://(.+):(.+)@(.+)/(.+)").captures | "DB_USER=\(.[0].string) PGPASSWORD=\(.[1].string) DB_HOST=\(.[2].string) DB_NAME=\(.[3].string)"'`
pg_dump -Fc --host $DB_HOST --dbname=$DB_NAME --username $DB_USER --file=$LOCAL_DB_DUMP_FILE
aws s3 cp $LOCAL_DB_DUMP_FILE ${DB_DUMP_S3_URI}.${GHA_COMMIT}
32 changes: 32 additions & 0 deletions backend/scripts/env_mirroring_aws_ecs/db_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Load RDS (postgres db) data from a (prod) db dump on S3
# to a specified destination deployment environment (dev or staging)
#
# THIS IS DESTRUCTIVE for the destination env! The source env will
# never be modified, but the dest env's data will be replaced.
#
# This script is intended to be run in as an AWS ECS task, where the IAM role is appropriately configured to allow for
# dest env S3 bucket RDS access (dev or staging).

LOCAL_DB_DUMP_FILE=db_dump.psql
if [[ -z $DEPLOYMENT_STAGE ]]; then
echo "DEPLOYMENT_STAGE is not set"
exit 1
fi
if [[ $DEPLOYMENT_STAGE -eq "prod" ]]; then
echo "DEPLOYMENT_STAGE is set to prod, which is not allowed"
exit 1
fi
if [[ -z $DB_DUMP_S3_URI ]]; then
echo "DB_DUMP_S3_URI is not set"
exit 1
fi
if [[ -z $GHA_COMMIT ]]; then
echo "GHA_COMMIT is not set"
exit 1
fi

eval `aws secretsmanager get-secret-value --secret-id corpora/backend/${DEPLOYMENT_STAGE}/database --region us-west-2 | jq -r '.SecretString' | jq -r '.database_uri | match("postgresql://(.+):(.+)@(.+)/(.+)").captures | "DB_USER=\(.[0].string) PGPASSWORD=\(.[1].string) DB_HOST=\(.[2].string) DB_NAME=\(.[3].string)"'`
aws s3 cp $DB_DUMP_S3_URI $LOCAL_DB_DUMP_FILE
# TODO: Remove echo statements once we're sure this is working
echo pg_restore --host $DB_HOST --dbname=$DB_NAME --username $DB_USER --clean --if-exists --no-owner --no-privileges --no-comments --schema=persistence_schema db_dump.psql
echo psql --host $DB_HOST --dbname=$DB_NAME --username $DB_USER -c "UPDATE persistence_schema.\"DatasetArtifact\" SET uri = regexp_replace(uri, '(s3:\\/\\/)([[:alpha:]]+-[[:alpha:]]+-)([[:alpha:]]+)(\\/.+)', '\\1\\2staging\\4') WHERE uri IS NOT NULL;"
28 changes: 28 additions & 0 deletions backend/scripts/env_mirroring_aws_ecs/s3_sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bash

# Mirror S3 data from a source deployment environment (usually production)
# to a specified destination deployment environment (dev or staging)
#
# This will *add* S3 objects to the dest env, but will not remove existing objects (see NOTE, below).
# The src S3 buckets will never be modified.
#
# This script is intended to be run in as an AWS ECS task, where the IAM role is appropriately configured to allow for
# prod env S3 bucket reads and dest env S3 bucket writes (dev or staging).

set -e

if [[ -z $DEPLOYMENT_STAGE ]]; then
echo "DEPLOYMENT_STAGE is not set"
exit 1
fi

if [[ $DEPLOYMENT_STAGE -eq "prod" ]]; then
echo "DEPLOYMENT_STAGE is set to prod, which is not allowed"
exit 1
fi

PARTITION_PREFIXES="0 1 2 3 4 5 6 7 8 9 a b c d e f"
S3_SYNC_CMD = "/usr/local/bin/aws s3 sync --copy-props metadata-directive --no-progress"
# TODO: Remove echo statements once we're sure this is working
echo parallel --line-buffer --jobs 16 $S3_SYNC_CMD s3://corpora-data-prod/ s3://corpora-data-${DEPLOYMENT_STAGE}/ --exclude "'*'" --include "'{}*'" --exclude "'*loom'" ::: $PARTITION_PREFIXES
echo parallel --line-buffer --jobs 16 $S3_SYNC_CMD s3://hosted-cellxgene-prod/ s3://hosted-cellxgene-${DEPLOYMENT_STAGE}/ --exclude "'*'" --include "'{}*'" ::: $PARTITION_PREFIXES
Loading