diff --git a/api/13B.Dockerfile b/api/13B.Dockerfile deleted file mode 100644 index 474d77f..0000000 --- a/api/13B.Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Define the image argument and provide a default value -ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest - -# Define the model file name and download url -ARG MODEL_FILE=llama-2-13b-chat.bin -ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin - -FROM ${IMAGE} - -ARG MODEL_FILE -ARG MODEL_DOWNLOAD_URL - -# Download the model file -RUN apt-get update -y && \ - apt-get install --yes curl && \ - mkdir -p /models && \ - curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL} - -WORKDIR /app - -COPY . . - -EXPOSE 8000 - -# Run the server start script -CMD ["/bin/sh", "/app/run.sh"] \ No newline at end of file diff --git a/api/70B.Dockerfile b/api/70B.Dockerfile deleted file mode 100644 index 036797b..0000000 --- a/api/70B.Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Define the image argument and provide a default value -ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest - -# Define the model file name and download url -ARG MODEL_FILE=llama-2-70b-chat.bin -ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin - -FROM ${IMAGE} - -ARG MODEL_FILE -ARG MODEL_DOWNLOAD_URL - -# Download the model file -RUN apt-get update -y && \ - apt-get install --yes curl && \ - mkdir -p /models && \ - curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL} - -WORKDIR /app - -COPY . . - -EXPOSE 8000 - -# Run the server start script -CMD ["/bin/sh", "/app/run.sh"] \ No newline at end of file diff --git a/api/Dockerfile b/api/Dockerfile deleted file mode 100644 index 731a2b8..0000000 --- a/api/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Define the image argument and provide a default value -ARG IMAGE=ghcr.io/abetlen/llama-cpp-python:latest - -# Define the model file name and download url -ARG MODEL_FILE=llama-2-7b-chat.bin -ARG MODEL_DOWNLOAD_URL=https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin - -FROM ${IMAGE} - -ARG MODEL_FILE -ARG MODEL_DOWNLOAD_URL - -# Download the model file -RUN apt-get update -y && \ - apt-get install --yes curl && \ - mkdir -p /models && \ - curl -L -o /models/${MODEL_FILE} ${MODEL_DOWNLOAD_URL} - -WORKDIR /app - -COPY . . - -EXPOSE 8000 - -# Run the server start script -CMD ["/bin/sh", "/app/run.sh"] \ No newline at end of file diff --git a/api/run.sh b/api/run.sh index 38d8118..b2aeb80 100755 --- a/api/run.sh +++ b/api/run.sh @@ -1,7 +1,34 @@ #!/bin/bash -make build +if [ -z "$MODEL" ] +then + echo "Please set the MODEL_FILE environment variable" + exit 1 +fi + +if [ -z "$MODEL_DOWNLOAD_URL" ] +then + echo "Please set the MODEL_DOWNLOAD_URL environment variable" + exit 1 +fi + +# check if curl is installed +if ! [ -x "$(command -v curl)" ]; then + echo "curl is not installed. Installing..." + apt-get update --yes --quiet + apt-get install --yes --quiet curl +fi + + +if [ ! -f $MODEL ]; then + echo "Model file not found. Downloading..." + curl -L -o $MODEL $MODEL_DOWNLOAD_URL +else + echo "$MODEL model found." +fi + +make build # Get the number of available threads on the system n_threads=$(grep -c ^processor /proc/cpuinfo) diff --git a/docker-compose-13b.yml b/docker-compose-13b.yml index 20e1f7b..108d361 100644 --- a/docker-compose-13b.yml +++ b/docker-compose-13b.yml @@ -2,16 +2,18 @@ version: '3.6' services: llama-gpt-api-13b: - # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-13b-chat:latest' - build: - context: ./api - dockerfile: 13B.Dockerfile + image: ghcr.io/abetlen/llama-cpp-python:latest restart: on-failure + volumes: + - './models:/models' + - './api:/api' environment: MODEL: '/models/llama-2-13b-chat.bin' USE_MLOCK: 1 + MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin' cap_add: - IPC_LOCK + command: '/bin/sh /api/run.sh' llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' diff --git a/docker-compose-70b.yml b/docker-compose-70b.yml index 91fa5ab..2643220 100644 --- a/docker-compose-70b.yml +++ b/docker-compose-70b.yml @@ -2,13 +2,14 @@ version: '3.6' services: llama-gpt-api-70b: - # image: 'ghcr.io/getumbrel/llama-gpt-api-llama-2-70b-chat:latest' - build: - context: ./api - dockerfile: 70B.Dockerfile + image: ghcr.io/abetlen/llama-cpp-python:latest restart: on-failure + volumes: + - './models:/models' + - './api:/api' environment: MODEL: '/models/llama-2-70b-chat.bin' + MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin' # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently, # it's not possible to change this using --n_gqa with llama-cpp-python in # run.sh, so we expose it as an environment variable. @@ -18,6 +19,7 @@ services: USE_MLOCK: 1 cap_add: - IPC_LOCK + command: '/bin/sh /api/run.sh' llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' diff --git a/docker-compose.yml b/docker-compose.yml index b62be47..81feccb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,13 +2,15 @@ version: '3.6' services: llama-gpt-api-7b: - # image: ghcr.io/getumbrel/llama-gpt-api - build: - context: ./api - dockerfile: Dockerfile + image: ghcr.io/abetlen/llama-cpp-python:latest restart: on-failure + volumes: + - './models:/models' + - './api:/api' environment: MODEL: '/models/llama-2-7b-chat.bin' + MODEL_DOWNLOAD_URL: 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin' + command: '/bin/sh /api/run.sh' llama-gpt-ui: image: 'ghcr.io/getumbrel/llama-gpt-ui:latest'