gguf-my-repo-ENHANCED

Running

App Files Files Community

Oleg Shulyakov commited on Aug 5

Commit

99ea333

1 Parent(s): 629e9b9

Use llama.cpp image

Browse files

Files changed (4) hide show

.dockerignore +15 -3
Dockerfile +11 -53
requirements.txt +5 -0
start.sh +2 -14

.dockerignore CHANGED Viewed

@@ -1,3 +1,15 @@
-/downloads
-/llama.cpp
-/outputs

+# IDE
+.idea/
+.vscode/
+.git*
+.dockerignore
+docker-compose.yml
+Dockerfile
+# LLama.cpp
+llama.cpp/
+# Working files
+downloads/
+outputs/

Dockerfile CHANGED Viewed

@@ -1,65 +1,23 @@
-FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install -y --no-install-recommends --fix-missing \
-    git \
-    git-lfs \
-    wget \
-    curl \
-    cmake \
-    # python build dependencies \
-    build-essential \
-    libssl-dev \
-    zlib1g-dev \
-    libbz2-dev \
-    libreadline-dev \
-    libsqlite3-dev \
-    libncursesw5-dev \
-    xz-utils \
-    tk-dev \
-    libxml2-dev \
-    libxmlsec1-dev \
-    libffi-dev \
-    liblzma-dev \
-    ffmpeg \
-    nvidia-driver-570
 # Check if user with UID 1000 exists, if not create it
 RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
 USER 1000
-ENV HOME=/home/user \
-    PATH=/home/user/.local/bin:${PATH}
-WORKDIR ${HOME}/app
-RUN curl https://pyenv.run | bash
-ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
-ARG PYTHON_VERSION=3.11
-RUN pyenv install ${PYTHON_VERSION} && \
-    pyenv global ${PYTHON_VERSION} && \
-    pyenv rehash && \
-    pip install --no-cache-dir -U pip setuptools wheel && \
-    pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search" "APScheduler"
-COPY --chown=1000 . ${HOME}/app
-RUN git clone https://github.com/ggerganov/llama.cpp
-RUN pip install -r llama.cpp/requirements/requirements-convert_hf_to_gguf.txt
-COPY train_data.txt ${HOME}/app/llama.cpp/
-ENV PYTHONPATH=${HOME}/app \
-    PYTHONUNBUFFERED=1 \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     GRADIO_ALLOW_FLAGGING=never \
     GRADIO_NUM_PORTS=1 \
     GRADIO_SERVER_NAME=0.0.0.0 \
     GRADIO_THEME=huggingface \
-    TQDM_POSITION=-1 \
-    TQDM_MININTERVAL=1 \
-    SYSTEM=spaces \
-    LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
-    PATH=/usr/local/nvidia/bin:${PATH}
-ENTRYPOINT /bin/bash start.sh

+FROM ghcr.io/ggml-org/llama.cpp:full
 # Check if user with UID 1000 exists, if not create it
 RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
 USER 1000
+ENV HOME=/home/user \
+    PATH=${PATH}:/home/user/.local/bin \
+    PATH=${PATH}:/app \
     HF_HUB_ENABLE_HF_TRANSFER=1 \
     GRADIO_ALLOW_FLAGGING=never \
     GRADIO_NUM_PORTS=1 \
     GRADIO_SERVER_NAME=0.0.0.0 \
     GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+WORKDIR ${HOME}/app
+COPY --chown=1000 . ${HOME}/app
+RUN pip install -r requirements.txt
+ENTRYPOINT ["/bin/bash", "start.sh"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+huggingface-hub
+hf-transfer
+gradio[oauth]==4.27.0
+gradio_huggingfacehub_search
+APScheduler

start.sh CHANGED Viewed

@@ -1,21 +1,9 @@
 #!/bin/bash
-if [ ! -d "llama.cpp" ]; then
-  # only run in dev env
-  git clone https://github.com/ggerganov/llama.cpp
-fi
 export GGML_CUDA=OFF
 if [[ -z "${RUN_CUDA}" ]]; then
-  # enable CUDA if NOT running locally
   export GGML_CUDA=ON
 fi
-cd llama.cpp
-cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} -DLLAMA_CURL=OFF
-cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
-cp ./build/bin/llama-* .
-rm -rf build
-cd ..
-python app.py

 #!/bin/bash
 export GGML_CUDA=OFF
+# enable CUDA
 if [[ -z "${RUN_CUDA}" ]]; then
   export GGML_CUDA=ON
 fi
+python3 app.py