From 17e3c8006fb655b163e7874547f1f5bdfbcde0cd Mon Sep 17 00:00:00 2001 From: Chris Adams Date: Wed, 15 May 2024 11:35:21 +0200 Subject: [PATCH] Add support for working with docker --- .env.docker.sample | 40 ++++++------- Dockerfile | 67 +++++++++++----------- compose.yaml | 1 + docs/deployment.md | 2 - docs/installation.md | 8 ++- docs/working-with-docker.md | 110 ++++++++++++++++++++++++++++++++++++ makefile | 12 ++++ scripts/build_containers.sh | 14 +++++ scripts/install_package.sh | 33 +++++++++++ 9 files changed, 232 insertions(+), 55 deletions(-) create mode 100644 docs/working-with-docker.md create mode 100755 scripts/build_containers.sh create mode 100644 scripts/install_package.sh diff --git a/.env.docker.sample b/.env.docker.sample index 451813b3..27c88547 100644 --- a/.env.docker.sample +++ b/.env.docker.sample @@ -1,17 +1,19 @@ # set the port to listen on PORT=9000 +GUNICORN_BIND_IP=0.0.0.0 PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1 -PYTHONBREAKPOINT="ipdb.set_trace" +PYTHONBREAKPOINT=ipdb.set_trace -SECRET_KEY='some-key' +SECRET_KEY=some-key -DATABASE_URL=mysql://localhost:3306/greencheck -DATABASE_URL_READ_ONLY=mysql://localhost:3306/greencheck +# docker +DATABASE_URL=mysql://deploy:deploy@db:3306/greencheck +DATABASE_URL_READ_ONLY=mysql://deploy:deploy@db:3306/greencheck +EXPLORER_TOKEN=some-token -EXPLORER_TOKEN="some-token" -MAILGUN_API_KEY="50 characters long-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +MAILGUN_API_KEY=50-characters-long-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # used to stop defaulting to debug DJANGO_SETTINGS_MODULE=greenweb.settings.development @@ -19,7 +21,7 @@ DJANGO_SETTINGS_MODULE=greenweb.settings.development # used for working with notebooks DJANGO_ALLOW_ASYNC_UNSAFE=True -RABBITMQ_URL=amqp://guest:guest@host.docker.internal:5672/ +RABBITMQ_URL=amqp://guest:guest@rabbitmq:5672/ DOMAIN_SNAPSHOT_BUCKET=tgwf-green-domains-dev @@ -27,31 +29,31 @@ OBJECT_STORAGE_INFRA_BUCKET=https://s3.nl-ams.scw.cloud # used for uploading files to object storage OBJECT_STORAGE_ENDPOINT=https://s3.nl-ams.scw.cloud OBJECT_STORAGE_REGION=nl-ams -OBJECT_STORAGE_ACCESS_KEY_ID="xxxxxxxxxxxxxxxxxxxx" -OBJECT_STORAGE_SECRET_ACCESS_KEY="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -OBJECT_STORAGE_BUCKET_NAME="tgwf-web-app-xxxx" +OBJECT_STORAGE_ACCESS_KEY_ID=xxxxxxxxxxxxxxxxxxxx +OBJECT_STORAGE_SECRET_ACCESS_KEY=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx +OBJECT_STORAGE_BUCKET_NAME=tgwf-web-app-xxxx # used for geolocation of IP addresses MAXMIND_USER_ID=123456 -MAXMIND_LICENCE_KEY="xxxxxxxxxxxxxxxx" +MAXMIND_LICENCE_KEY=xxxxxxxxxxxxxxxx MICROSOFT_PROVIDER_ID=1234 -MICROSOFT_LOCAL_FILE_DIRECTORY="https://tgwf-web-app-test.s3.nl-ams.scw.cloud/data-imports/ms-azure-ip-ranges-2022-04-25.json" +MICROSOFT_LOCAL_FILE_DIRECTORY=https://tgwf-web-app-test.s3.nl-ams.scw.cloud/data-imports/ms-azure-ip-ranges-2022-04-25.json EQUINIX_PROVIDER_ID=123 -EQUINIX_REMOTE_API_ENDPOINT="https://tgwf-web-app-live.s3.nl-ams.scw.cloud/data-imports/equinix.ips.and.asns.2022-02-25.txt" +EQUINIX_REMOTE_API_ENDPOINT=https://tgwf-web-app-live.s3.nl-ams.scw.cloud/data-imports/equinix.ips.and.asns.2022-02-25.txt AMAZON_PROVIDER_ID=697 -AMAZON_REMOTE_API_ENDPOINT="https://ip-ranges.amazonaws.com/ip-ranges.json" +AMAZON_REMOTE_API_ENDPOINT=https://ip-ranges.amazonaws.com/ip-ranges.json GOOGLE_PROVIDER_ID=597 -GOOGLE_DATASET_ENDPOINT="https://www.gstatic.com/ipranges/cloud.json" +GOOGLE_DATASET_ENDPOINT=https://www.gstatic.com/ipranges/cloud.json # ; used to allow for convenient access to the aws cli -AWS_SHARED_CREDENTIALS_FILE="/absolute/path/to/.aws.credentials" -AWS_CONFIG_FILE="/absolute/path/to/.aws.config" +AWS_SHARED_CREDENTIALS_FILE=/absolute/path/to/.aws.credentials +AWS_CONFIG_FILE=/absolute/path/to/.aws.config -API_URL="https://greenweb.localhost" +API_URL=https://greenweb.localhost -TRELLO_REGISTRATION_EMAIL_TO_BOARD_ADDRESS="mail-to-board@localhost" +TRELLO_REGISTRATION_EMAIL_TO_BOARD_ADDRESS=mail-to-board@localhost diff --git a/Dockerfile b/Dockerfile index b6fb9e6b..97aa13c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,67 +1,68 @@ FROM python:3.11 as production - +# Update the package listing, so we know what packages exist RUN apt-get update + +# Install security updates: RUN apt-get upgrade --yes -RUN apt-get install wget --no-install-recommends --yes -RUN wget https://deb.nodesource.com/setup_18.x -O /tmp/setup_18.x.sh --no-check-certificate + +RUN curl https://deb.nodesource.com/setup_18.x > /tmp/setup_18.x.sh RUN bash /tmp/setup_18.x.sh RUN apt-get install nodejs --no-install-recommends --yes +# Delete cached files we don't need anymore +RUN apt-get clean + +# Delete index files we don't need anymore: +RUN rm -rf /var/lib/apt/lists/* # Install dependencies in a virtualenv ENV VIRTUAL_ENV=/app/.venv - RUN useradd deploy --create-home && mkdir /app $VIRTUAL_ENV && chown -R deploy /app $VIRTUAL_ENV WORKDIR /app -# Set default environment variables. They are used at build time and runtime. -# If you specify your own environment variables on Heroku, they will -# override the ones set here. The ones below serve as sane defaults only. -# * PATH - Make sure that Poetry is on the PATH, along with our venv -# * PYTHONPATH - Ensure `django-admin` works correctly. -# * PYTHONUNBUFFERED - This is useful so Python does not hold any messages -# from being output. -# https://docs.python.org/3.9/using/cmdline.html#envvar-PYTHONUNBUFFERED -# https://docs.python.org/3.9/using/cmdline.html#cmdoption-u -# * DJANGO_SETTINGS_MODULE - default settings used in the container. -# * PORT - default port used. Please match with EXPOSE. +# Adding the virtual environment to the path saves us needing to +# run `source /app/.venv/bin/activate`, and adding python path +# makes it easier to run manage.py commands ENV PATH=$VIRTUAL_ENV/bin:$PATH \ PYTHONPATH=/app -# PYTHONUNBUFFERED=1 \ -# DJANGO_SETTINGS_MODULE=greenweb.settings.production \ -# PORT=9000 \ -# WEB_CONCURRENCY=3 \ -# GUNICORN_CMD_ARGS="-c gunicorn-conf.py --max-requests 1200 --max-requests-jitter 50 --access-logfile - --timeout 25 --reload" - -# Port exposed by this container. Should default to the port used by your WSGI -# server (Gunicorn). Heroku will ignore this. + +# Default port exposed by this container EXPOSE 9000 +# We don't want to use root. We use this user elsewhere without docker +# so we keep the same name for consistency USER deploy -# Install your app's Python requirements. +# Set up our virtual env directory RUN python -m venv $VIRTUAL_ENV -RUN python -m pip install uv wheel --upgrade +# Add our python libraries for managing dependencies +uv 0.1.43 is triggering bad certificate errors, so we pin to 0.1.39 +RUN python -m pip install uv==0.1.39 wheel --upgrade -# Copy application code. +# Copy application code, with dockerignore filtering out the stuff we don't want +# from our final build artefact COPY --chown=deploy . . - -# install dependencies via UV +# Install dependencies via uv RUN uv pip install -r requirements/requirements.linux.generated.txt -# set up front end pipeline +# Set up front end pipeline RUN python ./manage.py tailwind install RUN python ./manage.py tailwind build -# # # run npx rollup in correct directory +# Install the other node dependencies +# TODO: we might not need node in production *at all* if we can generate +# the static files in the build step. Something to investigate +WORKDIR /app RUN cd ./apps/theme/static_src/ && \ npx rollup --config -# # TODO Collect static. This command will move static files from application -# # directories and "static_compiled" folder to the main static directory that -# # will be served by the WSGI server. +# Collect static files RUN python ./manage.py collectstatic --noinput --clear + +# Use the shell form of CMD, so we have access to our environment variables +# $GUNICORN_CMD_ARGS allows us to add additional arguments to the gunicorn command +CMD gunicorn greenweb.wsgi --bind $GUNICORN_BIND_IP:$PORT --config gunicorn.conf.py $GUNICORN_CMD_ARGS diff --git a/compose.yaml b/compose.yaml index 1e2e06d3..28ff8b77 100644 --- a/compose.yaml +++ b/compose.yaml @@ -35,3 +35,4 @@ services: restart: always depends_on: - db + - rabbitmq diff --git a/docs/deployment.md b/docs/deployment.md index d16ba93b..5550da57 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -256,6 +256,4 @@ As mentioned before, we use systemd to run our both our workers and web server p ### Gunicorn logging -By default, gunicorn, our web server logs at the `INFO` level. This means successful requests are not logged, and only errors (with the status code 5xx) or not found requests (4xx) show up in logs. - The logs on each app server are sent to our the Loki server on our monitoring node, accessible at https://grafana.greenweb.org. This allow for centralised querying of logs. diff --git a/docs/installation.md b/docs/installation.md index 13e456a6..7ee837ec 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -6,6 +6,7 @@ In order to setup this project, we recommend following the guides from top to bo --- ## Development on a remote machine using Gitpod + ### Why Gitpod This is the main supported approach for setting up a development environment. You can use other approaches, but we may not be able to provide as much support. Gitpod will spin and configure a development environment for you, to which you can connect either from your IDE or from the browser. By choosing this approach you don't have to manually install any dependencies. @@ -20,13 +21,14 @@ In an isolated [virtual environment](https://docs.python.org/3/tutorial/venv.htm ### Prerequisites If you decide to go with this approach, you need to make sure you have the system dependencies installed. Use this command (if your OS uses `apt`) or equivalent for your operating system: + ``` sudo apt install python3 python3-dev build-essential libmariadb3 libmariadb-dev ``` __Note__ In the context of development, it is recommended to manage Python versions using [`pyenv`](https://github.com/pyenv/pyenv) instead of relying on the version shipped in the operating system. -__Note__ Currently Python version 3.11.9 is used on production. +__Note__ Currently Python version 3.11.9 is used in production. ### Setup Before following the following list, make sure you are in the root directory (workspace/admin-portal). @@ -51,3 +53,7 @@ Will run my-command with all the environment variables in .env set. ## Working with email This project has features that send email notifications to users. To test email functionality, this project uses [Mailhog](https://github.com/mailhog/MailHog). It's enabled by default in Gitpod environments, and you can access it on port 8025. + +## Working with Docker + +If you prefer working with docker, there are instructions for spinning up a local environment with `docker compose` and building docker images. See [working with docker](working-with-docker.md) for more. diff --git a/docs/working-with-docker.md b/docs/working-with-docker.md new file mode 100644 index 00000000..248c6146 --- /dev/null +++ b/docs/working-with-docker.md @@ -0,0 +1,110 @@ +# Working with Docker + +We use Docker to create packaged versions of the platform for use where a dockerised version of this code base is expected. + +The key scenarios we would use this would be + +- in local development with docker compose +- where running code is assumed to be ephemeral, like a 'serverless' service (for example, AWS Lambda, Fly.io, Scaleway Serverless Containers, Google Cloud Run, etc.) +- in tooling used to understand the resource usage of the platform, like Green Coding Solution's Green Metrics Tool (GMT) + + +### How do use docker and docker compose for development + +First, make sure you have docker or docker-compatible software installed (Orbstack on MacOS is a good example of the latter). + +The Green Web Platform is comprised of three main services + +- a WSGI django application: served by the Gunicorn webserver +- a message queue: RabbitMQ +- a database: MariaDB + +This topology is represented in our Gitpod development environment, but also the Docker Compose file, `compose.yaml`, which is consumed by the Green Metrics Tool for testing runs. + +#### Running a local version of the full system with docker compose + +You can spin up a local version of the setup above with Docker Compose by checking you are in the project root directory, and calling: + +```shell +# build the images locally, fetching the other images where needed +docker compose build +# run the services and main django application +docker compose up +``` + +This will download the various images needed, then start them as separate docker containers. By default the contents of the project `./apps` contianing most of the django code, and `./greenweb` directory are mounted into the running django container, allowing you to make changes. + +Similarly, an `.env.docker` file is used to provide the environment variables file that would be present in production, or in other development environments. See `.env.docker.sample` for an annotated list of the expected environment variables. + +```yml +# abridged file - see compose.yaml for more details +django: + env_file: + - path: ./.env.docker + build: + context: . + dockerfile: Dockerfile + container_name: greenweb-app + image: greenweb-app + expose: + - 9000 + ports: + - 9000:9000 + volumes: + - ./apps:/app/apps + - ./greenweb:/app/greenweb + restart: always + depends_on: + - db + - rabbitmq +``` + +#### Running a local version of the django app docker + +The green web platform is designed so that the different parts can be run on different servers, in various configurations. + +To run just the django application, once the container is built, you can run it like so: + +``` +docker run --env-file .env.prod.docker -it greenweb-app bash +``` + +This will log you into the running container, where you can run `gunicorn` to serve web requests, and put greenchecks onto a message queue for looking up: + +``` +gunicorn --bind 0.0.0.0:9000 --config gunicorn.conf.py greenweb.wsgi +``` + + +Or the `dramatiq` workers that take domains to off the message queue, look them up, and writ the results to lookup table: + +```shell +# run dramatiq, with one thread and one process, listening on all queues +python ./manage.py rundramatiq --threads 1 --process 1 +``` + +### Making new images + +If you are using docker you will at some point need to make new images. + +Running long commands in the terminal gets tedious, so the `build_containers` script for annotated notes for a script to building a docker image. + +There is a Makefile task intended for automating the process of creating builds - see `docker.build` + + + +### Publish an image + +Once you have an image built, to use it outside a development environment it needs to be accessible over the internet. At a high level the steps are: + +1. build the image +1. tag the image +1. push the image to a registry + +There is a Makefile task set up for this - see `docker.release` - this tags and pushes a built image to a docker repository hosted by Scaleway. You will need to be authenticated first. + +```shell +docker login / -u nologin +``` + +You will be prompted for a password - if you don't have access to these credentials, please contact one of the green web staff. diff --git a/makefile b/makefile index 91921d17..c33f46a2 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,8 @@ .PHONY: release venv +# used for tagging our docker images +TAG ?= $(shell echo $$APP_RELEASE)-$(shell git log -n 1 --format=%h) + # Create Python virtual environment if not yet created. venv: test -d .venv || python -m venv .venv @@ -58,3 +61,12 @@ docs: # Build the documentation using Sphinx and keep updating it on every change docs.watch: dotenv run -- sphinx-autobuild ./docs _build/ + +# make a docker image for publishing to our registry +docker.build: + docker build -t $(APP_NAME) + +# Push the current +docker.release: + docker tag $(APP_NAME) $(DOCKER_REGISTRY):$(TAG) + docker push $(DOCKER_REGISTRY)/$(APP_NAME):$(TAG) diff --git a/scripts/build_containers.sh b/scripts/build_containers.sh new file mode 100755 index 00000000..8426464d --- /dev/null +++ b/scripts/build_containers.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# this is convenience script when iterating on container builds +# most of the time we would use the makefile docker.build, but +# this is intended for debugging, and logging into containers +# when inspecting their contents + +set -euo pipefail + +# create our container +docker build . --tag greenweb-app + +# uncommment to log into our newly created container +# docker run --env-file .env.docker --interactive --tty greenweb-app bash diff --git a/scripts/install_package.sh b/scripts/install_package.sh new file mode 100644 index 00000000..e3ed1473 --- /dev/null +++ b/scripts/install_package.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# This script is intended to be run inside a Debian or Ubuntu container +# to install a package. + +# Annotated to explain common good practices to keep image sizes small + +# Bash "strict mode", to help catch problems and bugs in the shell +# script. Every bash script you write should include this. See +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ for +# details. +set -euo pipefail + +# Tell apt-get we're never going to be able to give manual +# feedback: +export DEBIAN_FRONTEND=noninteractive + +# Update the package listing, so we know what package exist: +apt-get update + +# Install security updates: +apt-get -y upgrade + +# Install a new package, without unnecessary recommended packages: +apt-get -y install --no-install-recommends syslog-ng + +# Delete cached files we don't need anymore (note that if you're +# using official Docker images for Debian or Ubuntu, this happens +# automatically, you don't need to do it yourself): +apt-get clean + +# Delete index files we don't need anymore: +rm -rf /var/lib/apt/lists/*