Skip to content

Commit

Permalink
Add cicd on merge (#172)
Browse files Browse the repository at this point in the history
* Add CI/CD on merge

* Test GitHub Action

* Revert
  • Loading branch information
medvedev1088 committed Aug 10, 2023
1 parent c2a81f7 commit 9f3ee20
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 2 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/cicd-on-merge.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
name: CICD (Merge)
on:
push:
branches: [main]

jobs:

get-updated-dirs:
runs-on: ubuntu-latest
outputs:
dirs: ${{ steps.get-dirs.outputs.dirs }}

steps:
- name: Checkout repo
uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Get updated directories
id: get-dirs
run: |
DIRS=$(git diff --name-only HEAD~1 HEAD | grep '^airflow/dags/resources/stages/parse/table_definitions' | cut -d"/" -f1-7 | uniq)
echo "Updated directories: $DIRS"
sudo apt-get install jq
DIRS_JSON=$(echo $DIRS | tr '\n' ' ' | jq -R -s -c 'split(" ") | map(select(length > 0))')
echo "dirs=$DIRS_JSON" >> $GITHUB_OUTPUT
run:
needs: [get-updated-dirs]
if: needs.get-updated-dirs.outputs.dirs != '[]'
runs-on: ubuntu-latest
strategy:
matrix:
dir: ${{fromJson(needs.get-updated-dirs.outputs.dirs)}}

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Build Docker image
run: |
docker build -t polygon-etl-parse:latest -f airflow/parse.Dockerfile airflow/.
- name: Run Docker image
run: |
DATASET_NAME=${{ matrix.dir }}
DATASET_NAME=${DATASET_NAME#airflow/dags/resources/stages/parse/table_definitions/} # this will remove 'airflow/dags/resources/stages/parse/table_definitions/' from start
echo "$SERVICE_ACCOUNT_PROD" > ./credentials.json
docker run \
-v $PWD:/app \
-e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials.json \
polygon-etl-parse:latest \
--project ${{ vars.PARSE_PROJECT }} \
--dataset_name $DATASET_NAME \
--dataset_folder "/app/${{ matrix.dir }}" \
--state_bucket ${{ vars.PARSE_STATE_BUCKET }} \
--destination_dataset_project_id ${{ vars.PARSE_DESTINATION_DATASET_PROJECT_ID }}
rm ./credentials.json
env:
SERVICE_ACCOUNT_PROD: ${{ secrets.SERVICE_ACCOUNT_PROD }}
2 changes: 1 addition & 1 deletion airflow/dags/polygonetl_airflow/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def read_partition_dag_vars(var_prefix, **kwargs):

def read_parse_dag_vars(var_prefix, **kwargs):
vars = {
# source_project_id takes its value from destination_dataset_project_id
'output_bucket': read_var('output_bucket', var_prefix, True, **kwargs),
# source_project_id takes its value from destination_dataset_project_id
'source_project_id': read_var('destination_dataset_project_id', var_prefix, True, **kwargs),
# internal_project_id takes its value from partitioned_project_id
'internal_project_id': read_var('partitioned_project_id', var_prefix, True, **kwargs),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@
],
"table_description": "",
"table_name": "EURS_event_Transfer"
}
},
"version": "1"
}
14 changes: 14 additions & 0 deletions airflow/parse.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# No slim since we need to build psutil (at least on Macbook M1).
FROM python:3.8.12

RUN pip install --upgrade pip

COPY . .

COPY requirements*.txt ./

RUN pip install -r requirements_parse.txt

ENV PYTHONPATH=/dags

ENTRYPOINT ["python", "run_parse_dataset_folder.py"]
8 changes: 8 additions & 0 deletions airflow/requirements_parse.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
eth-hash==0.3.3 # Fixes install conflicts issue in Composer
polygon-etl==0.3.7
web3==5.31.0 # Fixes install conflicts issue in Composer

google-api-core==2.8.1 # matches `composer-2.1.14-airflow-2.5.1`
google-cloud-bigquery==2.34.4 # matches `composer-2.1.14-airflow-2.5.1`
google-cloud-storage==2.7.0 # matches `composer-2.1.14-airflow-2.5.1`
jinja2==3.1.2 # matches `composer-2.1.14-airflow-2.5.1`
67 changes: 67 additions & 0 deletions airflow/run_parse_dataset_folder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import argparse
import logging

from google.cloud import bigquery

from polygonetl_airflow.parse.parse_dataset_folder_logic import parse_dataset_folder
from polygonetl_airflow.parse.parse_state_manager import ParseStateManager

# initialize argument parser
parser = argparse.ArgumentParser(
description="Script for parsing dataset folder with table definitions."
)

parser.add_argument("--project", type=str, help="GCP project ID.", required=True)
parser.add_argument(
"--dataset_name", type=str, help="Dataset name to be parsed.", required=True
)
parser.add_argument(
"--dataset_folder", type=str, help="Dataset folder to be parsed.", required=True
)
parser.add_argument(
"--state_bucket", type=str, help="State bucket.", required=True
)
parser.add_argument(
"--destination_dataset_project_id", type=str, help="GCP project of the destination dataset.", required=True
)

args = parser.parse_args()

sqls_folder = "dags/resources/stages/parse/sqls"

project = args.project

dataset_name = args.dataset_name
dataset_folder = args.dataset_folder
state_bucket = args.state_bucket
destination_dataset_project_id = args.destination_dataset_project_id
internal_project_id= destination_dataset_project_id + '-internal'

source_project_id = 'public-data-finance'
source_dataset_name = 'crypto_polygon'

bigquery_client = bigquery.Client(project=project)

logging_format = "%(asctime)s - %(name)s [%(levelname)s] - %(message)s"
logging.basicConfig(level=logging.INFO, format=logging_format)

parse_state_manager = ParseStateManager(
dataset_name=dataset_name,
state_bucket=state_bucket,
bucket_path=f"parse/state",
project=project,
)

parse_dataset_folder(
bigquery_client=bigquery_client,
dataset_folder=dataset_folder,
ds=None,
parse_state_manager=parse_state_manager,
source_project_id=source_project_id,
source_dataset_name=source_dataset_name,
destination_project_id=destination_dataset_project_id,
internal_project_id=internal_project_id,
sqls_folder=sqls_folder,
parse_all_partitions=None,
only_updated=True,
)

0 comments on commit 9f3ee20

Please sign in to comment.