diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile
deleted file mode 100644
index 474d77f..0000000
--- a/api/13B.Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# Define the image argument and provide a default value
-ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
-
-# Define the model file name and download url
-ARG MODEL_FILE=llama-2-13b-chat.bin
-ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin
-
-FROM ${IMAGE}
-
-ARG MODEL_FILE
-ARG MODEL_DOWNLOAD_URL
-
-# Download the model file
-RUN apt-get update -y && \
-    apt-get install --yes curl && \
-    mkdir -p /models && \
-    curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
-
-WORKDIR /app
-
-COPY . .
-
-EXPOSE 8000
-
-# Run the server start script
-CMD ["/bin/sh", "/app/run.sh"]
\ No newline at end of file
diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile
deleted file mode 100644
index 036797b..0000000
--- a/api/70B.Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# Define the image argument and provide a default value
-ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
-
-# Define the model file name and download url
-ARG MODEL_FILE=llama-2-70b-chat.bin
-ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin
-
-FROM ${IMAGE}
-
-ARG MODEL_FILE
-ARG MODEL_DOWNLOAD_URL
-
-# Download the model file
-RUN apt-get update -y && \
-    apt-get install --yes curl && \
-    mkdir -p /models && \
-    curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
-
-WORKDIR /app
-
-COPY . .
-
-EXPOSE 8000
-
-# Run the server start script
-CMD ["/bin/sh", "/app/run.sh"]
\ No newline at end of file
diff --git a/api/Dockerfile b/api/Dockerfile
deleted file mode 100644
index 731a2b8..0000000
--- a/api/Dockerfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# Define the image argument and provide a default value
-ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest
-
-# Define the model file name and download url
-ARG MODEL_FILE=llama-2-7b-chat.bin
-ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin
-
-FROM ${IMAGE}
-
-ARG MODEL_FILE
-ARG MODEL_DOWNLOAD_URL
-
-# Download the model file
-RUN apt-get update -y && \
-    apt-get install --yes curl && \
-    mkdir -p /models && \
-    curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL}
-
-WORKDIR /app
-
-COPY . .
-
-EXPOSE 8000
-
-# Run the server start script
-CMD ["/bin/sh", "/app/run.sh"]
\ No newline at end of file
diff --git a/api/run.sh b/api/run.sh
index 38d8118..b2aeb80 100755
--- a/api/run.sh
+++ b/api/run.sh
@@ -1,7 +1,34 @@
 #!/bin/bash
 
-make build
+if [ -z "$MODEL" ]
+then
+    echo "Please set the MODEL_FILE environment variable"
+    exit 1
+fi
+
+if [ -z "$MODEL_DOWNLOAD_URL" ]
+then
+    echo "Please set the MODEL_DOWNLOAD_URL environment variable"
+    exit 1
+fi
+
+# check if curl is installed
 
+if ! [ -x "$(command -v curl)" ]; then
+    echo "curl is not installed. Installing..."
+    apt-get update --yes --quiet
+    apt-get install --yes --quiet curl
+fi
+
+
+if [ ! -f $MODEL ]; then
+    echo "Model file not found. Downloading..."
+    curl -L -o $MODEL $MODEL_DOWNLOAD_URL
+else
+    echo "$MODEL model found."
+fi
+
+make build
 # Get the number of available threads on the system
 n_threads=$(grep -c ^processor /proc/cpuinfo)
 
diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml
index 20e1f7b..108d361 100644
--- a/docker-compose-13b.yml
+++ b/docker-compose-13b.yml
@@ -2,16 +2,18 @@ version: '3.6'
 
 services:
   llama-gpt-api-13b:
-    # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest'
-    build:
-      context: ./api
-      dockerfile: 13B.Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-13b-chat.bin'
       USE_MLOCK: 1
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin'
     cap_add:
       - IPC_LOCK
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml
index 91fa5ab..2643220 100644
--- a/docker-compose-70b.yml
+++ b/docker-compose-70b.yml
@@ -2,13 +2,14 @@ version: '3.6'
 
 services:
   llama-gpt-api-70b:
-    # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest'
-    build:
-      context: ./api
-      dockerfile: 70B.Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-70b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin'
       # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently,
       # it's not possible to change this using --n_gqa with llama-cpp-python in
       # run.sh, so we expose it as an environment variable.
@@ -18,6 +19,7 @@ services:
       USE_MLOCK: 1
     cap_add:
       - IPC_LOCK
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'
diff --git a/docker-compose.yml b/docker-compose.yml
index b62be47..81feccb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,13 +2,15 @@ version: '3.6'
 
 services:
   llama-gpt-api-7b:
-    # image: ghcr.io/getumbrel/llama-gpt-api
-    build:
-      context: ./api
-      dockerfile: Dockerfile
+    image: ghcr.io/abetlen/llama-cpp-python:latest
     restart: on-failure
+    volumes:
+      - './models:/models'
+      - './api:/api'
     environment:
       MODEL: '/models/llama-2-7b-chat.bin'
+      MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin'
+    command: '/bin/sh /api/run.sh'
 
   llama-gpt-ui:
     image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'