From a888b9ab783b4c85fcc690e2a6a963b3027924c1 Mon Sep 17 00:00:00 2001 From: Chris Adams Date: Wed, 8 May 2024 22:45:22 +0200 Subject: [PATCH] Switch from supervisor to systemd for process mgmt --- ansible/_add_system_dependencies.yml | 1 - ansible/_set_up_process_mgmt.yml | 16 ++-- ansible/deploy-workers.yml | 75 ++++++++++-------- ansible/deploy.yml | 19 ++--- ansible/inventories/prod.yml | 6 +- ansible/inventories/staging.yml | 4 +- ansible/scale_processes.yml | 12 +-- ansible/templates/nginx.conf.j2 | 79 ------------------- ansible/templates/run_gunicorn.sh.j2 | 13 +-- ansible/templates/run_worker.sh.j2 | 13 +-- ansible/templates/supervisor.gunicorn.conf.j2 | 16 ---- ansible/templates/supervisor.worker.conf.j2 | 15 ---- ansible/templates/systemd.web-app.service.j2 | 21 +++++ ansible/templates/systemd.worker.service.j2 | 21 +++++ docs/deployment.md | 10 +-- docs/how-to.md | 2 +- makefile | 2 +- 17 files changed, 136 insertions(+), 189 deletions(-) delete mode 100644 ansible/templates/nginx.conf.j2 delete mode 100644 ansible/templates/supervisor.gunicorn.conf.j2 delete mode 100644 ansible/templates/supervisor.worker.conf.j2 create mode 100644 ansible/templates/systemd.web-app.service.j2 create mode 100644 ansible/templates/systemd.worker.service.j2 diff --git a/ansible/_add_system_dependencies.yml b/ansible/_add_system_dependencies.yml index 3bfa51ca..94167589 100644 --- a/ansible/_add_system_dependencies.yml +++ b/ansible/_add_system_dependencies.yml @@ -64,7 +64,6 @@ - python3.11-venv - python3.11-dev - python-is-python3 - # - supervisor state: present update_cache: true become: true diff --git a/ansible/_set_up_process_mgmt.yml b/ansible/_set_up_process_mgmt.yml index 93eec55d..49d9470c 100644 --- a/ansible/_set_up_process_mgmt.yml +++ b/ansible/_set_up_process_mgmt.yml @@ -1,7 +1,7 @@ --- # shared steps for provisioning boxes, deploying and controlling # how web workers and queue workers are scaled -- name: Set up script for running workers and gunicorn, via supervisor in project +- name: Set up script for running workers and web app, via systemd in project ansible.builtin.template: src: "{{ item.src }}" dest: "{{ item.dest }}" @@ -19,10 +19,10 @@ } become: true tags: - - supervisor + - systemd - config -- name: Set up supervisor entries for workers and web +- name: Set up systemd entries for workers and web app ansible.builtin.template: src: "{{ item.src }}" dest: "{{ item.dest }}" @@ -31,14 +31,14 @@ mode: "0755" loop: - { - src: "supervisor.gunicorn.conf.j2", - dest: "/etc/supervisor/conf.d/{{ supervisor_gunicorn_app }}.conf", + src: "systemd.web-app.service.j2", + dest: "/etc/systemd/system/{{ service_gunicorn_app }}.service", } - { - src: "supervisor.worker.conf.j2", - dest: "/etc/supervisor/conf.d/{{ supervisor_worker_job }}.conf", + src: "systemd.worker.service.j2", + dest: "/etc/systemd/system/{{ service_worker_job }}.service", } become: true tags: - - supervisor + - systemd - config diff --git a/ansible/deploy-workers.yml b/ansible/deploy-workers.yml index ce6c3672..0f673af7 100644 --- a/ansible/deploy-workers.yml +++ b/ansible/deploy-workers.yml @@ -26,12 +26,11 @@ # handled in source control, but if the front end will not build in deployment, this # is a workaround update_front_end_deps: true - # For triggering restarts with supervisor - supervisor_restart: true - supervisor_user: "deploy" - supervisor_gunicorn_app: "admin_web" - supervisor_worker_job: "admin_worker" - + # For triggering restarts with systemd + service_restart: true + service_user: "deploy" + service_gunicorn_app: "admin_web" + service_worker_job: "admin_worker" # See "dramatiq_threads" and "dramatiq_processes" # in the inventory @@ -53,10 +52,9 @@ - name: Assemble assets and related files ansible.builtin.include_tasks: "_assemble_deploy_assets.yml" - when: - compile_assets is true + when: compile_assets is true - - name: Set up script for running workers and gunicorn, via supervisor + - name: Set up script for running workers and gunicorn, via systemd ansible.builtin.template: src: "{{ item.src }}" dest: "{{ item.dest }}" @@ -64,31 +62,42 @@ group: deploy mode: "0755" loop: - - { src: "run_worker.sh.j2", dest: "{{ project_root }}/current/run_worker.sh" } - - { src: "run_gunicorn.sh.j2", dest: "{{ project_root }}/current/run_gunicorn.sh" } + - { + src: "run_worker.sh.j2", + dest: "{{ project_root }}/current/run_worker.sh", + } + - { + src: "run_gunicorn.sh.j2", + dest: "{{ project_root }}/current/run_gunicorn.sh", + } become: true tags: - - supervisor + - systemd +- name: Set up systemd entries for workers and web app + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: deploy + group: deploy + mode: "0755" + loop: + - { + src: "systemd.web-app.service.j2", + dest: "/etc/systemd/system/{{ service_gunicorn_app }}.service", + } + - { + src: "systemd.worker.service.j2", + dest: "/etc/systemd/system/{{ service_worker_job }}.service", + } + become: true + tags: + - systemd + - config - - name: Set up supervisor entries for workers and web - ansible.builtin.template: - src: "{{ item.src }}" - dest: "{{ item.dest }}" - owner: deploy - group: deploy - mode: "0755" - loop: - - { src: "supervisor.gunicorn.conf.j2", dest: "/etc/supervisor/conf.d/{{ tgwf_domain_name }}_web.conf" } - - { src: "supervisor.worker.conf.j2", dest: "/etc/supervisor/conf.d/{{ tgwf_domain_name }}_worker.conf" } - become: true - tags: - - supervisor - - - - name: Trigger restart for worker with supervisor - ansible.builtin.supervisorctl: - name: "{{ supervisor_worker_job }}:" - state: restarted - become: true - when: supervisor_restart is true +- name: Trigger restart for worker with systemd + ansible.builtin.service: + name: "{{ service_worker_job }}:" + state: restarted + become: true + when: service_restart is true diff --git a/ansible/deploy.yml b/ansible/deploy.yml index 5d009c43..6ad2774c 100644 --- a/ansible/deploy.yml +++ b/ansible/deploy.yml @@ -32,8 +32,8 @@ # create new dotenv file in shared directory (use when adding new env vars) update_dotenv: false - supervisor_restart: true - supervisor_user: "deploy" + service_restart: true + service_user: "deploy" # flag for deciding to compile assets or not # this is the slowest deploy step @@ -67,19 +67,20 @@ when: update_dotenv is true tags: [dotenv] - - name: Set up process management with supervisor + - name: Set up process management with systemd ansible.builtin.include_tasks: "_set_up_process_mgmt.yml" tags: - - supervisor + - systemd + - systemd - - name: Trigger restart for app with supervisor - ansible.builtin.supervisorctl: - name: "{{ supervisor_gunicorn_app }}" + - name: Trigger restart for app with systemd + ansible.builtin.service: + name: "{{ service_gunicorn_app }}" state: restarted become: true - when: supervisor_restart is true + when: service_restart is true tags: - - supervisor + - systemd - name: Reload nginx ansible.builtin.service: diff --git a/ansible/inventories/prod.yml b/ansible/inventories/prod.yml index acdaa5a5..8f7896b0 100644 --- a/ansible/inventories/prod.yml +++ b/ansible/inventories/prod.yml @@ -43,9 +43,9 @@ all: project_root: "/var/www/{{ tgwf_domain_name }}.thegreenwebfoundation.org" project_deploy_branch: "master" ansible_user: "deploy" - supervisor_user: "deploy" - supervisor_gunicorn_app: "web_{{ tgwf_stage }}" - supervisor_worker_job: "worker_{{ tgwf_stage }}" + service_user: "deploy" + service_gunicorn_app: "web_{{ tgwf_stage }}" + service_worker_job: "worker_{{ tgwf_stage }}" gunicorn_port: 9000 # you can set child groups too diff --git a/ansible/inventories/staging.yml b/ansible/inventories/staging.yml index 47a930c9..6cdc8545 100644 --- a/ansible/inventories/staging.yml +++ b/ansible/inventories/staging.yml @@ -16,8 +16,8 @@ all: project_root: "/var/www/{{ tgwf_domain_name }}.thegreenwebfoundation.org" project_deploy_branch: "staging" ansible_user: "deploy" - supervisor_gunicorn_app: "web_{{ tgwf_stage }}" - supervisor_worker_job: "worker_{{ tgwf_stage }}" + service_gunicorn_app: "web_{{ tgwf_stage }}" + service_worker_job: "worker_{{ tgwf_stage }}" # you can set child groups too children: diff --git a/ansible/scale_processes.yml b/ansible/scale_processes.yml index f23068d8..0ed9f0fe 100644 --- a/ansible/scale_processes.yml +++ b/ansible/scale_processes.yml @@ -14,15 +14,15 @@ preferred_state: started tasks: - - name: Set up process management with supervisor + - name: Set up process management with systemd ansible.builtin.include_tasks: "_set_up_process_mgmt.yml" tags: - - supervisor + - systemd - - name: Update django apps using supervisor - ansible.builtin.supervisorctl: - name: "{{ supervisor_gunicorn_app }}:" + - name: Update django apps using systemd + ansible.builtin.service: + name: "{{ service_gunicorn_app }}:" state: "{{ preferred_state }}" become: true tags: - - supervisor + - systemd diff --git a/ansible/templates/nginx.conf.j2 b/ansible/templates/nginx.conf.j2 deleted file mode 100644 index 933c81f6..00000000 --- a/ansible/templates/nginx.conf.j2 +++ /dev/null @@ -1,79 +0,0 @@ -# {{ ansible_managed }} -# Last run: {{ template_run_date }} - - -upstream {{ tgwf_domain_name }} { - # replace with loop, once we have a nice way to keep supervisor and - # nginx in sync, so we only forward to healthy app servers - - # hel1 - # server localhost:9000; - # app1 - server 10.0.0.4:9000; - # app2 - server 10.0.0.5:9000; -} - -server { - listen 443 ssl; # managed by Certbot - server_name {{ tgwf_domain_name }}.thegreenwebfoundation.org; - - access_log /var/log/nginx/{{ tgwf_domain_name }}.thegreenwebfoundation.org-access.log; - error_log /var/log/nginx/{{ tgwf_domain_name }}.thegreenwebfoundation.org-error.log; - - ssl_certificate /etc/letsencrypt/live/admin.thegreenwebfoundation.org/fullchain.pem; # managed by Certbot - ssl_certificate_key /etc/letsencrypt/live/admin.thegreenwebfoundation.org/privkey.pem; # managed by Certbot - - include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot - ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot - - keepalive_timeout 5; - - # for batch API checks we can end up with large files being - # uploaded/downloaded. Adding this gives support, and stops - # the Nginx 413 Request Entity Too Large error - client_max_body_size 128M; - - location = /favicon.ico { - access_log off; - log_not_found off; - } - - location @proxy_to_app { - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Host $host; - - # we need this for when we served requests to app servers on different - # machines to the server acting as the reverse proxy - proxy_redirect redirect replacement; - proxy_pass http://{{ tgwf_domain_name }}; - } - - location / { - # make sure only static files we choose to expose are reachable, then fallback to - # django serving the request - root "{{ project_root }}/current/staticfiles"; - try_files $uri @proxy_to_app; - } - - location /media/ { - # make sure only static files we choose to expose are reachable, then fallback to - # django serving the request - root "{{ project_root }}/shared/media/"; - try_files $uri @proxy_to_app; - } - -} - -server { - if ($host = {{ tgwf_domain_name }}.thegreenwebfoundation.org) { - return 301 https://$host$request_uri; - } # managed by Certbot - - - listen {{ ansible_default_ipv4.address }}:80; - server_name {{ tgwf_domain_name }}.thegreenwebfoundation.org; - return 404; # managed by Certbot -} - diff --git a/ansible/templates/run_gunicorn.sh.j2 b/ansible/templates/run_gunicorn.sh.j2 index 128a6c15..d8cb11ec 100644 --- a/ansible/templates/run_gunicorn.sh.j2 +++ b/ansible/templates/run_gunicorn.sh.j2 @@ -1,8 +1,11 @@ -# supervisor can only control processes it started itself. -# So we need to use exec to replace the parent shell script process -# that calls gunicorn -source .venv/bin/activate -exec dotenv run -- gunicorn greenweb.wsgi \ +#! /usr/bin/bash +# {{ ansible_managed }} +# Last run: {{ template_run_date }} + + +# calling `exec` here means that systemd sends a KILL command to gunicorn when stopping or restarting +# allowing for a graceful shutdown or reboot +exec {{ project_root }}/current/.venv/bin/gunicorn greenweb.wsgi \ --bind {{ internal_ip }}:{{ gunicorn_port }} \ --timeout 300 \ --config gunicorn.conf.py \ diff --git a/ansible/templates/run_worker.sh.j2 b/ansible/templates/run_worker.sh.j2 index d8cfe365..727b24af 100644 --- a/ansible/templates/run_worker.sh.j2 +++ b/ansible/templates/run_worker.sh.j2 @@ -1,8 +1,11 @@ -# supervisor can only control processes it started itself. -# So we need to use exec to replace the parent shell script process -# that calls manage.py -source .venv/bin/activate -exec dotenv run -- ./manage.py rundramatiq \ +#! /usr/bin/bash + +# {{ ansible_managed }} +# Last run: {{ template_run_date }} + +# calling `exec` here means that systemd sends a KILL command to dramatiq when stopping or restarting +# allowing for a graceful shutdown or reboot +exec {{ project_root }}/current/.venv/bin/python ./manage.py rundramatiq \ --threads {{ dramatiq_threads }} \ --processes {{ dramatiq_processes }} \ --queues default diff --git a/ansible/templates/supervisor.gunicorn.conf.j2 b/ansible/templates/supervisor.gunicorn.conf.j2 deleted file mode 100644 index 27ab228f..00000000 --- a/ansible/templates/supervisor.gunicorn.conf.j2 +++ /dev/null @@ -1,16 +0,0 @@ -# {{ ansible_managed }} -# Last run: {{ template_run_date }} - -[supervisord] -environment=LC_ALL='en_US.UTF-8',LANG='en_US.UTF-8' - -[program:{{ supervisor_gunicorn_app }}] -directory=/var/www/{{ tgwf_domain_name }}.thegreenwebfoundation.org/current/ -numprocs=1 -command=bash ./run_gunicorn.sh -autostart=true -autorestart=true -stopsignal=QUIT -user={{ supervisor_user }} -stdout_logfile=/var/log/supervisor/%(program_name)s_%(process_num)02d.log -stderr_logfile=/var/log/supervisor/%(program_name)s_%(process_num)02d.error.log diff --git a/ansible/templates/supervisor.worker.conf.j2 b/ansible/templates/supervisor.worker.conf.j2 deleted file mode 100644 index 99a063fd..00000000 --- a/ansible/templates/supervisor.worker.conf.j2 +++ /dev/null @@ -1,15 +0,0 @@ -# {{ ansible_managed }} -# Last run: {{ template_run_date }} - -[supervisord] -environment=LC_ALL='en_US.UTF-8',LANG='en_US.UTF-8' - -[program:{{ supervisor_worker_job }}] -directory=/var/www/{{ tgwf_domain_name }}.thegreenwebfoundation.org/current/ -command=bash ./run_worker.sh -autostart=true -autorestart=true -stopsignal=TERM -user={{ supervisor_user }} -stdout_logfile=/var/log/supervisor/%(program_name)s_%(process_num)02d.log -stderr_logfile=/var/log/supervisor/%(program_name)s_%(process_num)02d.error.log diff --git a/ansible/templates/systemd.web-app.service.j2 b/ansible/templates/systemd.web-app.service.j2 new file mode 100644 index 00000000..80aaeeb9 --- /dev/null +++ b/ansible/templates/systemd.web-app.service.j2 @@ -0,0 +1,21 @@ +# {{ ansible_managed }} +# Last run: {{ template_run_date }} + +[Unit] +Description=Greenweb Web App +Documentation=https://greenweb.readthedocs.io/ +Wants=network-online.target +After=network-online.target + +[Service] +ExecStart={{ project_root}}/current/run_gunicorn.sh +ExecReload=/bin/kill -s HUP $MAINPID +WorkingDirectory={{ project_root}}/current/ +EnvironmentFile={{ project_root}}/current/.env +User={{ service_user }} +Group={{ service_user }} +KillMode=process +KillSignal=SIGTERM +Restart=on-failure +[Install] +WantedBy=multi-user.target diff --git a/ansible/templates/systemd.worker.service.j2 b/ansible/templates/systemd.worker.service.j2 new file mode 100644 index 00000000..3093548b --- /dev/null +++ b/ansible/templates/systemd.worker.service.j2 @@ -0,0 +1,21 @@ +# {{ ansible_managed }} +# Last run: {{ template_run_date }} + +[Unit] +Description=Greenweb Worker +Documentation=https://greenweb.readthedocs.io/ +Wants=network-online.target +After=network-online.target + +[Service] +ExecStart={{ project_root}}/current/run_worker.sh +EnvironmentFile={{ project_root}}/current/.env +WorkingDirectory={{ project_root}}/current/ +ExecReload=/bin/kill -s HUP $MAINPID +User={{ service_user }} +Group={{ service_user }} +KillMode=process +KillSignal=SIGTERM +Restart=on-failure +[Install] +WantedBy=multi-user.target diff --git a/docs/deployment.md b/docs/deployment.md index 4a9d755e..1e0f69c2 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -229,19 +229,19 @@ Each new deploy using the `deploy.yml` ansible playbook deploys the version of t If you only want to scale the workers up and down, and don't want to run through the whole deployment process, updating just the processes is possible. -You have two possible options - first pass the `supervisor` tag to the deploy script. This will only run the steps tagged with `supervisor` in the deploy playbook. +You have two possible options - first pass the `systemd` tag to the deploy script. This will only run the steps tagged with `systemd` in the deploy playbook. ``` -ansible-playbook -i ansible/inventories/prod.yml ./ansible/deploy.yml --tags supervisor +ansible-playbook -i ansible/inventories/prod.yml ./ansible/deploy.yml --tags systemd ``` Alternatively, you can run the dedicated `scale-processes.yml` playbook. This includes the same tasks as are defined in the larger `deploy` playbook: ``` -ansible-playbook -i ansible/inventories/prod.yml ./ansible/deploy.yml --tags supervisor +ansible-playbook -i ansible/inventories/prod.yml ./ansible/deploy.yml --tags systemd ``` -These playbooks template out new scripts that supervisor the installed process monitors use to run both the gunicorn web servers and dramatiq queue workers, then send a command to update stop, start or restart these processes. +These playbooks template out new scripts that systemd uses to run both the gunicorn web servers and dramatiq queue workers, then send a command to update stop, start or restart these processes. **Further reading** @@ -252,7 +252,7 @@ These playbooks template out new scripts that supervisor the installed process m ### Logging -As mentioned before, we use supervisor to run our both our workers and web server processes. This means processes are restarted automatically for us, and logs are rotated for us. +As mentioned before, we use systemd to run our both our workers and web server processes. This means processes are restarted automatically for us, and logs are rotated for us. ### Gunicorn logging diff --git a/docs/how-to.md b/docs/how-to.md index 0f3884b0..411f1667 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -40,7 +40,7 @@ This project also uses libraries to allow the use the handy `aws` cli tool with *Examples* -_(These all assume you have `pipenv shell` to load the necessarty environment variables into your shell)_: +_(These all assume you are running `dotenv run -- $COMMAND` to load the necessary environment variables into your shell)_: See the buckets you have access to: diff --git a/makefile b/makefile index 7ea587fd..d71b2046 100644 --- a/makefile +++ b/makefile @@ -2,7 +2,7 @@ # Create Python virtual environment if not yet created. venv: - test -d venv || python3 -m venv venv + test -d venv || python -m venv venv ## Installing release: