Skip to content

Commit

Permalink
Move models out of the Docker image to a volume (#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
edgar971 authored Aug 21, 2023
1 parent aa8b266 commit 4dc2369
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 91 deletions.
26 changes: 0 additions & 26 deletions api/13B.Dockerfile

This file was deleted.

26 changes: 0 additions & 26 deletions api/70B.Dockerfile

This file was deleted.

26 changes: 0 additions & 26 deletions api/Dockerfile

This file was deleted.

29 changes: 28 additions & 1 deletion api/run.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,34 @@
#!/bin/bash

make build
if [ -z "$MODEL" ]
then
echo "Please set the MODEL_FILE environment variable"
exit 1
fi

if [ -z "$MODEL_DOWNLOAD_URL" ]
then
echo "Please set the MODEL_DOWNLOAD_URL environment variable"
exit 1
fi

# check if curl is installed

if ! [ -x "$(command -v curl)" ]; then
echo "curl is not installed. Installing..."
apt-get update --yes --quiet
apt-get install --yes --quiet curl
fi


if [ ! -f $MODEL ]; then
echo "Model file not found. Downloading..."
curl -L -o $MODEL $MODEL_DOWNLOAD_URL
else
echo "$MODEL model found."
fi

make build
# Get the number of available threads on the system
n_threads=$(grep -c ^processor /proc/cpuinfo)

Expand Down
10 changes: 6 additions & 4 deletions docker-compose-13b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@ version: '3.6'

services:
llama-gpt-api-13b:
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
build:
context: ./api
dockerfile: 13B.Dockerfile
image: ghcr.io/abetlen/llama-cpp-python:latest
restart: on-failure
volumes:
- './models:/models'
- './api:/api'
environment:
MODEL: '/models/llama-2-13b-chat.bin'
USE_MLOCK: 1
MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin'
cap_add:
- IPC_LOCK
command: '/bin/sh /api/run.sh'

llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
Expand Down
10 changes: 6 additions & 4 deletions docker-compose-70b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ version: '3.6'

services:
llama-gpt-api-70b:
# image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
build:
context: ./api
dockerfile: 70B.Dockerfile
image: ghcr.io/abetlen/llama-cpp-python:latest
restart: on-failure
volumes:
- './models:/models'
- './api:/api'
environment:
MODEL: '/models/llama-2-70b-chat.bin'
MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin'
# Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
# it's not possible to change this using --n_gqa with llama-cpp-python in
# run.sh, so we expose it as an environment variable.
Expand All @@ -18,6 +19,7 @@ services:
USE_MLOCK: 1
cap_add:
- IPC_LOCK
command: '/bin/sh /api/run.sh'

llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
Expand Down
10 changes: 6 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ version: '3.6'

services:
llama-gpt-api-7b:
# image: ghcr.io/getumbrel/llama-gpt-api
build:
context: ./api
dockerfile: Dockerfile
image: ghcr.io/abetlen/llama-cpp-python:latest
restart: on-failure
volumes:
- './models:/models'
- './api:/api'
environment:
MODEL: '/models/llama-2-7b-chat.bin'
MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin'
command: '/bin/sh /api/run.sh'

llama-gpt-ui:
image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
Expand Down

0 comments on commit 4dc2369

Please sign in to comment.