getumbrel · mayankchhabra · Aug 21, 2023 · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile
diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile
diff --git a/api/Dockerfile b/api/Dockerfile
@@ -1,20 +1,13 @@
 # Define the image argument and provide a default value
 ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
 
-# Define the model file name and download url
-ARG MODEL_FILE=llama-2-7b-chat.bin
-ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin
-
 FROM ${IMAGE}
 
-ARG MODEL_FILE
-ARG MODEL_DOWNLOAD_URL
-
-# Download the model file
+# Install curl and create a directory for the models
 RUN apt-get update -y && \
     apt-get install --yes curl && \
     mkdir -p /models && \
-    curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
+    make build
 
 WORKDIR /app
 

diff --git a/api/run.sh b/api/run.sh
@@ -1,6 +1,24 @@
 #!/bin/bash
 
-make build
+if [ -z "$MODEL" ]
+then
+    echo "Please set the MODEL_FILE environment variable"
+    exit 1
+fi
+
+if [ -z "$MODEL_DOWNLOAD_URL" ]
+then
+    echo "Please set the MODEL_DOWNLOAD_URL environment variable"
+    exit 1
+fi
+
+
+if [ ! -f $MODEL ]; then
+    echo "Model file not found. Downloading..."
+    curl -L -o $MODEL $MODEL_DOWNLOAD_URL
+else
+    echo "$MODEL model found."
+fi
 
 # Get the number of available threads on the system
 n_threads=$(grep -c ^processor /proc/cpuinfo)

diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml
@@ -5,11 +5,14 @@ services:
     # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
     build:
       context: ./api
-      dockerfile: 13B.Dockerfile
+      dockerfile: Dockerfile
     restart: on-failure
+    volumes:
+      - './models:/models'
     environment:
       MODEL: '/models/llama-2-13b-chat.bin'
       USE_MLOCK: 1
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin'
     cap_add:
       - IPC_LOCK
 

diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml
@@ -5,10 +5,13 @@ services:
     # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
     build:
       context: ./api
-      dockerfile: 70B.Dockerfile
+      dockerfile: Dockerfile
     restart: on-failure
+    volumes:
+      - './models:/models'
     environment:
       MODEL: '/models/llama-2-70b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin'
       # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
       # it's not possible to change this using --n_gqa with llama-cpp-python in
       # run.sh, so we expose it as an environment variable.

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -7,8 +7,11 @@ services:
       context: ./api
       dockerfile: Dockerfile
     restart: on-failure
+    volumes:
+      - './models:/models'
     environment:
       MODEL: '/models/llama-2-7b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'