From 17e3c8006fb655b163e7874547f1f5bdfbcde0cd Mon Sep 17 00:00:00 2001
From: Chris Adams <chris@productscience.co.uk>
Date: Wed, 15 May 2024 11:35:21 +0200
Subject: [PATCH] Add support for working with docker

---
 .env.docker.sample          |  40 ++++++-------
 Dockerfile                  |  67 +++++++++++-----------
 compose.yaml                |   1 +
 docs/deployment.md          |   2 -
 docs/installation.md        |   8 ++-
 docs/working-with-docker.md | 110 ++++++++++++++++++++++++++++++++++++
 makefile                    |  12 ++++
 scripts/build_containers.sh |  14 +++++
 scripts/install_package.sh  |  33 +++++++++++
 9 files changed, 232 insertions(+), 55 deletions(-)
 create mode 100644 docs/working-with-docker.md
 create mode 100755 scripts/build_containers.sh
 create mode 100644 scripts/install_package.sh

diff --git a/.env.docker.sample b/.env.docker.sample
index 451813b3..27c88547 100644
--- a/.env.docker.sample
+++ b/.env.docker.sample
@@ -1,17 +1,19 @@
 # set the port to listen on
 PORT=9000
+GUNICORN_BIND_IP=0.0.0.0
 
 PYTHONDONTWRITEBYTECODE=1
 PYTHONUNBUFFERED=1
-PYTHONBREAKPOINT="ipdb.set_trace"
+PYTHONBREAKPOINT=ipdb.set_trace
 
-SECRET_KEY='some-key'
+SECRET_KEY=some-key
 
-DATABASE_URL=mysql://localhost:3306/greencheck
-DATABASE_URL_READ_ONLY=mysql://localhost:3306/greencheck
+# docker 
+DATABASE_URL=mysql://deploy:deploy@db:3306/greencheck
+DATABASE_URL_READ_ONLY=mysql://deploy:deploy@db:3306/greencheck
+EXPLORER_TOKEN=some-token
 
-EXPLORER_TOKEN="some-token"
-MAILGUN_API_KEY="50 characters long-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+MAILGUN_API_KEY=50-characters-long-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 
 # used to stop defaulting to debug
 DJANGO_SETTINGS_MODULE=greenweb.settings.development
@@ -19,7 +21,7 @@ DJANGO_SETTINGS_MODULE=greenweb.settings.development
 # used for working with notebooks
 DJANGO_ALLOW_ASYNC_UNSAFE=True
 
-RABBITMQ_URL=amqp://guest:guest@host.docker.internal:5672/
+RABBITMQ_URL=amqp://guest:guest@rabbitmq:5672/
 
 
 DOMAIN_SNAPSHOT_BUCKET=tgwf-green-domains-dev
@@ -27,31 +29,31 @@ OBJECT_STORAGE_INFRA_BUCKET=https://s3.nl-ams.scw.cloud
 # used for uploading files to object storage
 OBJECT_STORAGE_ENDPOINT=https://s3.nl-ams.scw.cloud
 OBJECT_STORAGE_REGION=nl-ams
-OBJECT_STORAGE_ACCESS_KEY_ID="xxxxxxxxxxxxxxxxxxxx"
-OBJECT_STORAGE_SECRET_ACCESS_KEY="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
-OBJECT_STORAGE_BUCKET_NAME="tgwf-web-app-xxxx"
+OBJECT_STORAGE_ACCESS_KEY_ID=xxxxxxxxxxxxxxxxxxxx
+OBJECT_STORAGE_SECRET_ACCESS_KEY=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+OBJECT_STORAGE_BUCKET_NAME=tgwf-web-app-xxxx
 
 # used for geolocation of IP addresses
 MAXMIND_USER_ID=123456
-MAXMIND_LICENCE_KEY="xxxxxxxxxxxxxxxx"
+MAXMIND_LICENCE_KEY=xxxxxxxxxxxxxxxx
 
 MICROSOFT_PROVIDER_ID=1234
-MICROSOFT_LOCAL_FILE_DIRECTORY="https://tgwf-web-app-test.s3.nl-ams.scw.cloud/data-imports/ms-azure-ip-ranges-2022-04-25.json"
+MICROSOFT_LOCAL_FILE_DIRECTORY=https://tgwf-web-app-test.s3.nl-ams.scw.cloud/data-imports/ms-azure-ip-ranges-2022-04-25.json
 
 EQUINIX_PROVIDER_ID=123
-EQUINIX_REMOTE_API_ENDPOINT="https://tgwf-web-app-live.s3.nl-ams.scw.cloud/data-imports/equinix.ips.and.asns.2022-02-25.txt"
+EQUINIX_REMOTE_API_ENDPOINT=https://tgwf-web-app-live.s3.nl-ams.scw.cloud/data-imports/equinix.ips.and.asns.2022-02-25.txt
 
 AMAZON_PROVIDER_ID=697
-AMAZON_REMOTE_API_ENDPOINT="https://ip-ranges.amazonaws.com/ip-ranges.json"
+AMAZON_REMOTE_API_ENDPOINT=https://ip-ranges.amazonaws.com/ip-ranges.json
 
 GOOGLE_PROVIDER_ID=597
-GOOGLE_DATASET_ENDPOINT="https://www.gstatic.com/ipranges/cloud.json"
+GOOGLE_DATASET_ENDPOINT=https://www.gstatic.com/ipranges/cloud.json
 
 
 # ; used to allow for convenient access to the aws cli
-AWS_SHARED_CREDENTIALS_FILE="/absolute/path/to/.aws.credentials"
-AWS_CONFIG_FILE="/absolute/path/to/.aws.config"
+AWS_SHARED_CREDENTIALS_FILE=/absolute/path/to/.aws.credentials
+AWS_CONFIG_FILE=/absolute/path/to/.aws.config
 
-API_URL="https://greenweb.localhost"
+API_URL=https://greenweb.localhost
 
-TRELLO_REGISTRATION_EMAIL_TO_BOARD_ADDRESS="mail-to-board@localhost"
+TRELLO_REGISTRATION_EMAIL_TO_BOARD_ADDRESS=mail-to-board@localhost
diff --git a/Dockerfile b/Dockerfile
index b6fb9e6b..97aa13c5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,67 +1,68 @@
 FROM python:3.11 as production
 
-
+# Update the package listing, so we know what packages exist
 RUN apt-get update
+
+# Install security updates:
 RUN apt-get upgrade --yes
-RUN apt-get install wget --no-install-recommends --yes
-RUN wget https://deb.nodesource.com/setup_18.x -O /tmp/setup_18.x.sh --no-check-certificate
+
+RUN curl https://deb.nodesource.com/setup_18.x > /tmp/setup_18.x.sh
 RUN bash /tmp/setup_18.x.sh
 RUN apt-get install nodejs --no-install-recommends --yes
 
+# Delete cached files we don't need anymore
+RUN apt-get clean
+
+# Delete index files we don't need anymore:
+RUN rm -rf /var/lib/apt/lists/*
 
 # Install dependencies in a virtualenv
 ENV VIRTUAL_ENV=/app/.venv
-
 RUN useradd deploy --create-home && mkdir /app $VIRTUAL_ENV && chown -R deploy /app $VIRTUAL_ENV
 
 WORKDIR /app
 
-# Set default environment variables. They are used at build time and runtime.
-# If you specify your own environment variables on Heroku, they will
-# override the ones set here. The ones below serve as sane defaults only.
-#  * PATH - Make sure that Poetry is on the PATH, along with our venv
-#  * PYTHONPATH - Ensure `django-admin` works correctly.
-#  * PYTHONUNBUFFERED - This is useful so Python does not hold any messages
-#    from being output.
-#    https://docs.python.org/3.9/using/cmdline.html#envvar-PYTHONUNBUFFERED
-#    https://docs.python.org/3.9/using/cmdline.html#cmdoption-u
-#  * DJANGO_SETTINGS_MODULE - default settings used in the container.
-#  * PORT - default port used. Please match with EXPOSE.
+# Adding the virtual environment to the path saves us needing to 
+# run `source /app/.venv/bin/activate`, and adding python path
+# makes it easier to run manage.py commands
 ENV PATH=$VIRTUAL_ENV/bin:$PATH \
     PYTHONPATH=/app
-#     PYTHONUNBUFFERED=1 \
-#     DJANGO_SETTINGS_MODULE=greenweb.settings.production \
-#     PORT=9000 \
-#     WEB_CONCURRENCY=3 \
-#     GUNICORN_CMD_ARGS="-c gunicorn-conf.py --max-requests 1200 --max-requests-jitter 50 --access-logfile - --timeout 25 --reload"
-
-# Port exposed by this container. Should default to the port used by your WSGI
-# server (Gunicorn). Heroku will ignore this.
+
+# Default port exposed by this container
 EXPOSE 9000
 
+# We don't want to use root. We use this user elsewhere without docker
+# so we keep the same name for consistency
 USER deploy
 
-# Install your app's Python requirements.
+# Set up our virtual env directory
 RUN python -m venv $VIRTUAL_ENV
-RUN python -m pip install uv wheel --upgrade
 
+# Add our python libraries for managing dependencies
+uv 0.1.43 is triggering bad certificate errors, so we pin to 0.1.39
+RUN python -m pip install uv==0.1.39 wheel --upgrade
 
-# Copy application code.
+# Copy application code, with dockerignore filtering out the stuff we don't want
+# from our final build artefact
 COPY --chown=deploy . .
 
-
-# install dependencies via UV
+# Install dependencies via uv
 RUN uv pip install -r requirements/requirements.linux.generated.txt 
 
-# set up front end pipeline
+# Set up front end pipeline
 RUN python ./manage.py tailwind install
 RUN python ./manage.py tailwind build
 
-# # # run npx rollup in correct directory
+# Install the other node dependencies
+# TODO: we might not need node in production *at all* if we can generate 
+# the static files in the build step. Something to investigate
+WORKDIR /app
 RUN cd ./apps/theme/static_src/ && \
     npx rollup --config
 
-# # TODO Collect static. This command will move static files from application
-# # directories and "static_compiled" folder to the main static directory that
-# # will be served by the WSGI server.
+# Collect static files
 RUN python ./manage.py collectstatic --noinput --clear
+
+# Use the shell form of CMD, so we have access to our environment variables
+# $GUNICORN_CMD_ARGS allows us to add additional arguments to the gunicorn command
+CMD gunicorn greenweb.wsgi --bind $GUNICORN_BIND_IP:$PORT --config gunicorn.conf.py $GUNICORN_CMD_ARGS
diff --git a/compose.yaml b/compose.yaml
index 1e2e06d3..28ff8b77 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -35,3 +35,4 @@ services:
     restart: always
     depends_on:
       - db
+      - rabbitmq
diff --git a/docs/deployment.md b/docs/deployment.md
index d16ba93b..5550da57 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -256,6 +256,4 @@ As mentioned before, we use systemd to run our both our workers and web server p
 
 ### Gunicorn logging
 
-By default, gunicorn, our web server logs at the `INFO` level. This means successful requests are not logged, and only errors (with the status code 5xx) or not found requests (4xx)  show up in logs.
-
 The logs on each app server are sent to our the Loki server on our monitoring node, accessible at https://grafana.greenweb.org. This allow for centralised querying of logs.
diff --git a/docs/installation.md b/docs/installation.md
index 13e456a6..7ee837ec 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -6,6 +6,7 @@ In order to setup this project, we recommend following the guides from top to bo
 
 ---
 ## Development on a remote machine using Gitpod
+
 ### Why Gitpod
 This is the main supported approach for setting up a development environment. You can use other approaches, but we may not be able to provide as much support. Gitpod will spin and configure a development environment for you, to which you can connect either from your IDE or from the browser. By choosing this approach you don't have to manually install any dependencies.
  
@@ -20,13 +21,14 @@ In an isolated [virtual environment](https://docs.python.org/3/tutorial/venv.htm
 
 ### Prerequisites
 If you decide to go with this approach, you need to make sure you have the system dependencies installed. Use this command (if your OS uses `apt`) or equivalent for your operating system:
+
 ```
 sudo apt install python3 python3-dev build-essential libmariadb3 libmariadb-dev
 ```
 
 __Note__ In the context of development, it is recommended to manage Python versions using [`pyenv`](https://github.com/pyenv/pyenv) instead of relying on the version shipped in the operating system.
 
-__Note__ Currently Python version 3.11.9 is used on production.
+__Note__ Currently Python version 3.11.9 is used in production.
 
 ### Setup
 Before following the following list, make sure you are in the root directory (workspace/admin-portal).
@@ -51,3 +53,7 @@ Will run my-command with all the environment variables in .env set.
 ## Working with email
 
 This project has features that send email notifications to users. To test email functionality, this project uses [Mailhog](https://github.com/mailhog/MailHog). It's enabled by default in Gitpod environments, and you can access it on port 8025.
+
+## Working with Docker
+
+If you prefer working with docker, there are instructions for spinning up a local environment with `docker compose` and building docker images. See [working with docker](working-with-docker.md) for more.
diff --git a/docs/working-with-docker.md b/docs/working-with-docker.md
new file mode 100644
index 00000000..248c6146
--- /dev/null
+++ b/docs/working-with-docker.md
@@ -0,0 +1,110 @@
+# Working with Docker
+
+We use Docker to create packaged versions of the platform for use where a dockerised version of this code base is expected.
+
+The key scenarios we would use this would be
+
+- in local development with docker compose
+- where running code is assumed to be ephemeral, like a 'serverless' service (for example, AWS Lambda, Fly.io, Scaleway Serverless Containers, Google Cloud Run, etc.)
+- in tooling used to understand the resource usage of the platform, like Green Coding Solution's Green Metrics Tool (GMT)
+
+
+### How do use docker and docker compose for development
+
+First, make sure you have docker or docker-compatible software installed (Orbstack on MacOS is a good example of the latter).
+
+The Green Web Platform is comprised of three main services
+
+- a WSGI django application: served by the Gunicorn webserver
+- a message queue: RabbitMQ
+- a database: MariaDB
+
+This topology is represented in our Gitpod development environment, but also the Docker Compose file, `compose.yaml`, which is consumed by the Green Metrics Tool for testing runs.
+
+#### Running a local version of the full system with docker compose
+
+You can spin up a local version of the setup above with Docker Compose by checking you are in the project root directory, and calling:
+
+```shell
+# build the images locally, fetching the other images where needed
+docker compose build
+# run the services and main django application
+docker compose up
+```
+
+This will download the various images needed, then start them as separate docker containers. By default the contents of the project `./apps` contianing most of the django code, and `./greenweb` directory are mounted into the running django container, allowing you to make changes. 
+
+Similarly, an `.env.docker` file is used to provide the environment variables file that would be present in production, or in other development environments. See `.env.docker.sample` for an annotated list of the expected environment variables.
+
+```yml
+# abridged file - see compose.yaml for more details
+django:
+    env_file:
+    - path: ./.env.docker
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: greenweb-app
+    image: greenweb-app
+    expose:
+      - 9000
+    ports:
+      - 9000:9000
+    volumes:
+      - ./apps:/app/apps
+      - ./greenweb:/app/greenweb
+    restart: always
+    depends_on:
+      - db
+      - rabbitmq
+```
+
+#### Running a local version of the django app docker
+
+The green web platform is designed so that the different parts can be run on different servers, in various configurations.
+
+To run just the django application, once the container is built, you can run it like so:
+
+```
+docker run  --env-file .env.prod.docker  -it greenweb-app bash
+```
+
+This will log you into the running container, where you can run `gunicorn` to serve web requests, and put greenchecks onto a message queue for looking up:
+
+```
+gunicorn --bind 0.0.0.0:9000 --config gunicorn.conf.py greenweb.wsgi
+```
+
+
+Or the `dramatiq` workers that take domains to off the message queue, look them up, and writ the results to lookup table:
+
+```shell
+# run dramatiq, with one thread and one process, listening on all queues
+python ./manage.py rundramatiq --threads 1 --process 1
+```
+
+###  Making new images
+
+If you are using docker you will at some point need to make new images.
+
+Running long commands in the terminal gets tedious, so the `build_containers` script for annotated notes for a script to building a docker image.
+
+There is a Makefile task intended for automating the process of creating builds - see `docker.build`
+
+
+
+### Publish an image
+
+Once you have an image built, to use it outside a development environment it needs to be accessible over the internet. At a high level the steps are:
+
+1. build the image
+1. tag the image
+1. push the image to a registry
+
+There is a Makefile task set up for this - see `docker.release` - this tags and pushes a built image to a docker repository hosted by Scaleway. You will need to be authenticated first. 
+
+```shell 
+docker login <SCALEWAY_DOCKER_REGISTRY>/<GREENWEB_NAMESPACE> -u nologin
+```
+
+You will be prompted for a password - if you don't have access to these credentials, please contact one of the green web staff.
diff --git a/makefile b/makefile
index 91921d17..c33f46a2 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,8 @@
 .PHONY: release venv
 
+# used for tagging our docker images
+TAG ?= $(shell echo $$APP_RELEASE)-$(shell git log -n 1 --format=%h)
+
 # Create Python virtual environment if not yet created.
 venv:
 	test -d .venv || python -m venv .venv
@@ -58,3 +61,12 @@ docs:
 # Build the documentation using Sphinx and keep updating it on every change
 docs.watch:
 	dotenv run -- sphinx-autobuild ./docs _build/
+
+# make a docker image for publishing to our registry
+docker.build:
+	docker build -t $(APP_NAME)
+
+# Push the current 
+docker.release:	
+	docker tag $(APP_NAME) $(DOCKER_REGISTRY):$(TAG)
+	docker push $(DOCKER_REGISTRY)/$(APP_NAME):$(TAG)
diff --git a/scripts/build_containers.sh b/scripts/build_containers.sh
new file mode 100755
index 00000000..8426464d
--- /dev/null
+++ b/scripts/build_containers.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# this is convenience script when iterating on container builds
+# most of the time we would use the makefile docker.build, but 
+# this is intended for debugging, and logging into containers 
+# when inspecting their contents
+
+set -euo pipefail
+
+# create our container
+docker build . --tag greenweb-app
+
+# uncommment to log into our newly created container
+# docker run  --env-file .env.docker  --interactive --tty greenweb-app bash
diff --git a/scripts/install_package.sh b/scripts/install_package.sh
new file mode 100644
index 00000000..e3ed1473
--- /dev/null
+++ b/scripts/install_package.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# This script is intended to be run inside a Debian or Ubuntu container
+# to install a package.
+
+# Annotated to explain common good practices to keep image sizes small
+
+# Bash "strict mode", to help catch problems and bugs in the shell
+# script. Every bash script you write should include this. See
+# http://redsymbol.net/articles/unofficial-bash-strict-mode/ for
+# details.
+set -euo pipefail
+
+# Tell apt-get we're never going to be able to give manual
+# feedback:
+export DEBIAN_FRONTEND=noninteractive
+
+# Update the package listing, so we know what package exist:
+apt-get update
+
+# Install security updates:
+apt-get -y upgrade
+
+# Install a new package, without unnecessary recommended packages:
+apt-get -y install --no-install-recommends syslog-ng
+
+# Delete cached files we don't need anymore (note that if you're
+# using official Docker images for Debian or Ubuntu, this happens
+# automatically, you don't need to do it yourself):
+apt-get clean
+
+# Delete index files we don't need anymore:
+rm -rf /var/lib/apt/lists/*