Move models out of the Docker image to a volume (#19)

getumbrel · Aug 21, 2023 · 4dc2369 · 4dc2369
1 parent aa8b266
commit 4dc2369
Show file tree

Hide file tree

Showing 7 changed files with 46 additions and 91 deletions.
diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile
diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile
diff --git a/api/Dockerfile b/api/Dockerfile
diff --git a/api/run.sh b/api/run.sh
@@ -1,7 +1,34 @@
 #!/bin/bash
 
-make build
+if [ -z "$MODEL" ]
+then
+    echo "Please set the MODEL_FILE environment variable"
+    exit 1
+fi
+
+if [ -z "$MODEL_DOWNLOAD_URL" ]
+then
+    echo "Please set the MODEL_DOWNLOAD_URL environment variable"
+    exit 1
+fi
+
+# check if curl is installed
 
+if ! [ -x "$(command -v curl)" ]; then
+    echo "curl is not installed. Installing..."
+    apt-get update --yes --quiet
+    apt-get install --yes --quiet curl
+fi
+
+
+if [ ! -f $MODEL ]; then
+    echo "Model file not found. Downloading..."
+    curl -L -o $MODEL $MODEL_DOWNLOAD_URL
+else
+    echo "$MODEL model found."
+fi
+
+make build
 # Get the number of available threads on the system
 n_threads=$(grep -c ^processor /proc/cpuinfo)
 

diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml
@@ -2,16 +2,18 @@ version: '3.6'
 
 services:
   llama-gpt-api-13b:
-    # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
-    build:
-      context: ./api
-      dockerfile: 13B.Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-13b-chat.bin'
       USE_MLOCK: 1
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin'
     cap_add:
       - IPC_LOCK
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'

diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml
@@ -2,13 +2,14 @@ version: '3.6'
 
 services:
   llama-gpt-api-70b:
-    # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
-    build:
-      context: ./api
-      dockerfile: 70B.Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-70b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin'
       # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
       # it's not possible to change this using --n_gqa with llama-cpp-python in
       # run.sh, so we expose it as an environment variable.
@@ -18,6 +19,7 @@ services:
       USE_MLOCK: 1
     cap_add:
       - IPC_LOCK
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,13 +2,15 @@ version: '3.6'
 
 services:
   llama-gpt-api-7b:
-    # image: ghcr.io/getumbrel/llama-gpt-api
-    build:
-      context: ./api
-      dockerfile: Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-7b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin'
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'