Skip to content

Commit

Permalink
Update with ROCM
Browse files Browse the repository at this point in the history
  • Loading branch information
korewaChino committed Sep 2, 2023
1 parent 785c378 commit f37b4f8
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 24 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ jobs:
uses: docker/build-push-action@v4
with:
push: true
tags: korewachino/koboldcpp:latest,korewachino/koboldcpp:1.41,korewachino/koboldcpp:cuda
tags: |
korewachino/koboldcpp:latest
korewachino/koboldcpp:1.42.1
korewachino/koboldcpp:cuda
docker-nocuda:
runs-on: ubuntu-latest
steps:
Expand All @@ -48,4 +51,7 @@ jobs:
with:
file: nocuda.dockerfile
push: true
tags: korewachino/koboldcpp:nocuda,korewachino/koboldcpp:1.41-nocuda
tags: |
korewachino/koboldcpp:nocuda
korewachino/koboldcpp:1.42.1-rocm
korewachino/koboldcpp:1.42.1-nocuda
25 changes: 20 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
FROM nvidia/cuda:12.2.0-devel-ubuntu20.04
ARG UBUNTU_VERSION=22.04
ARG CUDA_VERSION=12.2.0

FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl gnupg software-properties-common
RUN add-apt-repository -y ppa:cnugteren/clblast && apt-get update
Expand Down Expand Up @@ -27,16 +33,25 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install -r requirements.txt

RUN pip install psutil


RUN mkdir ./home/koboldcpp
COPY ./koboldcpp ./home/koboldcpp

WORKDIR /home/koboldcpp

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt

# You need this environment variable to make sure that the CUDA architecture works for all GPUs
ENV CUDA_DOCKER_ARCH=all
RUN make LLAMA_OPENBLAS=1 LLAMA_CUBLAS=1 LLAMA_CLBLAST=1 -j$(nproc)
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
ENV LLAMA_CUBLAS=1
ENV LLAMA_OPENBLAS=1
ENV LLAMA_CUBLAS=1
ENV LLAMA_CLBLAST=1
RUN make -j$(nproc)

WORKDIR /
COPY start_program.sh /home/koboldcpp
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ services:
working_dir: /home/koboldcpp
build:
context: .
dockerfile: Dockerfile
dockerfile: nocuda.dockerfile
# dockerfile: nocuda.dockerfile
ports:
- "5001:5001"
Expand Down
2 changes: 1 addition & 1 deletion koboldcpp
Submodule koboldcpp updated 89 files
+44 −0 .devops/full-rocm.Dockerfile
+35 −9 .devops/llama-cpp-clblast.srpm.spec
+25 −1 .devops/llama-cpp-cublas.srpm.spec
+36 −9 .devops/llama-cpp.srpm.spec
+44 −0 .devops/main-rocm.Dockerfile
+43 −0 .github/workflows/gguf-publish.yml
+2 −15 .gitignore
+41 −1 CMakeLists.txt
+70 −22 Makefile
+119 −22 ci/run.sh
+376 −13 common/common.cpp
+43 −1 common/common.h
+1 −0 common/log.h
+76 −93 convert-falcon-hf-to-gguf.py
+70 −103 convert-gptneox-hf-to-gguf.py
+75 −125 convert-llama-7b-pth-to-gguf.py
+14 −14 convert-llama-ggmlv3-to-gguf.py
+75 −126 convert-llama-hf-to-gguf.py
+3 −3 convert-lora-to-ggml.py
+287 −166 convert.py
+1 −0 examples/CMakeLists.txt
+8 −0 examples/beam-search/CMakeLists.txt
+188 −0 examples/beam-search/beam-search.cpp
+1 −1 examples/chat.sh
+2 −6 examples/convert-llama2c-to-ggml/README.md
+218 −123 examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+1 −1 examples/embd-input/embd-input-lib.cpp
+1 −4 examples/embedding/embedding.cpp
+5 −0 examples/gguf/CMakeLists.txt
+3 −0 examples/gguf/gguf.cpp
+59 −12 examples/llama-bench/llama-bench.cpp
+2 −1 examples/llm.vim
+271 −112 examples/main/main.cpp
+198 −31 examples/perplexity/perplexity.cpp
+2 −2 examples/quantize/quantize.cpp
+2 −2 examples/save-load-state/save-load-state.cpp
+16 −13 examples/server/README.md
+1,675 −1,101 examples/server/index.html.hpp
+231 −14 examples/server/public/index.html
+160 −49 examples/server/server.cpp
+2 −2 examples/simple/simple.cpp
+7 −7 examples/train-text-from-scratch/README.md
+492 −0 examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
+1,152 −2,250 examples/train-text-from-scratch/train-text-from-scratch.cpp
+12 −23 expose.cpp
+1 −0 expose.h
+87 −110 ggml-alloc.c
+195 −66 ggml-cuda.cu
+8 −0 ggml-cuda.h
+1 −0 ggml-metal.h
+133 −59 ggml-metal.m
+86 −7 ggml-metal.metal
+410 −425 ggml.c
+45 −21 ggml.h
+21 −0 gguf-py/LICENSE
+72 −0 gguf-py/README.md
+1 −0 gguf-py/gguf/__init__.py
+858 −0 gguf-py/gguf/gguf.py
+0 −0 gguf-py/gguf/py.typed
+29 −0 gguf-py/pyproject.toml
+7 −0 gguf-py/tests/test_gguf.py
+0 −723 gguf.py
+53 −21 gpttype_adapter.cpp
+2 −2 k_quants.c
+631 −64 klite.embd
+32 −19 koboldcpp.py
+475 −111 llama.cpp
+52 −6 llama.h
+12 −1 model_adapter.cpp
+1 −1 model_adapter.h
+55 −0 otherarch/ggml_v2-cuda-legacy.cu
+57 −1 otherarch/ggml_v2-cuda.cu
+10 −2 otherarch/gpt2_v3.cpp
+11 −3 otherarch/gptj_v3.cpp
+1 −1 otherarch/llama_v2.cpp
+10 −2 otherarch/mpt_v3.cpp
+11 −3 otherarch/neox_v3.cpp
+1 −0 requirements.txt
+140 −0 run_with_preset.py
+26 −0 scripts/convert-gg.sh
+30 −0 scripts/qnt-all.sh
+34 −0 scripts/run-all-perf.sh
+30 −0 scripts/run-all-ppl.sh
+3 −0 tests/test-c.c
+178 −0 tests/test-tokenizer-0-falcon.cpp
+83 −0 tests/test-tokenizer-0-falcon.py
+182 −0 tests/test-tokenizer-0-llama.cpp
+95 −0 tests/test-tokenizer-0-llama.py
+3 −11 tests/test-tokenizer-1.cpp
48 changes: 35 additions & 13 deletions nocuda.dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
FROM ubuntu:20.04
ARG UBUNTU_VERSION=22.04

# Initialize the image
# Modify to pre-install dev tools and ROCm packages
ARG ROCM_VERSION=5.3
ARG AMDGPU_VERSION=5.3
# This needs to generally match the container host's environment.
ARG ROCM_VERSION=5.6

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates curl gnupg software-properties-common && \
curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
sh -c 'echo deb [arch=amd64] http://repo.radeon.com/rocm/apt/$ROCM_VERSION/ focal main > /etc/apt/sources.list.d/rocm.list' && \
sh -c 'echo deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list'
# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102


FROM ${BASE_ROCM_DEV_CONTAINER}
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates curl gnupg software-properties-common
RUN add-apt-repository -y ppa:cnugteren/clblast -y && apt-get update
RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
RUN apt-get install -y --no-install-recommends \
sudo \
libelf1 \
libnuma-dev \
Expand All @@ -36,14 +52,20 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install -r requirements.txt
RUN pip install psutil
RUN mkdir ./home/koboldcpp
COPY ./koboldcpp ./home/koboldcpp

WORKDIR /home/koboldcpp
RUN make LLAMA_OPENBLAS=1 LLAMA_CLBLAST=1 -j$(nproc)
RUN pip install -r requirements.txt

ENV LLAMA_OPENBLAS=1
ENV LLAMA_CLBLAST=1
ENV LLAMA_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

RUN make -j$(nproc)

WORKDIR /
COPY start_program.sh /home/koboldcpp
Expand Down
2 changes: 0 additions & 2 deletions requirements.txt

This file was deleted.

0 comments on commit f37b4f8

Please sign in to comment.