65 lines
2.1 KiB
Docker
65 lines
2.1 KiB
Docker
# ---------- Stage 1: build llama-server (CUDA, op jouw CPU) ----------
|
|
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS builder
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# Basis build tools + curl dev (voor HTTP server)
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
git build-essential cmake ninja-build ca-certificates \
|
|
libcurl4-openssl-dev && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
WORKDIR /src
|
|
|
|
# Lama.cpp ophalen (HEAD, incl. nieuwe tool-calling / server)
|
|
RUN git clone --depth 1 https://github.com/ggml-org/llama.cpp.git .
|
|
|
|
# Belangrijk: zorg dat de linker een libcuda.so.1 ziet (stub)
|
|
# De stub zit in /usr/local/cuda/lib64/stubs/libcuda.so
|
|
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so \
|
|
/usr/lib/x86_64-linux-gnu/libcuda.so.1
|
|
|
|
# CMake configure:
|
|
# - Server aan
|
|
# - CUDA backend aan
|
|
# - Native CPU-optimalisatie (op jouw T5600, dus geen AVX2)
|
|
# - Architectuur 61 (Pascal, P5000)
|
|
RUN cmake -S . -B build \
|
|
-G Ninja \
|
|
-DCMAKE_BUILD_TYPE=Release \
|
|
-DLLAMA_BUILD_TESTS=OFF \
|
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
|
-DLLAMA_BUILD_SERVER=ON \
|
|
-DGGML_CUDA=ON \
|
|
-DGGML_NATIVE=ON \
|
|
-DCMAKE_CUDA_ARCHITECTURES=61
|
|
|
|
# Build alleen de server
|
|
RUN cmake --build build --config Release --target llama-server
|
|
|
|
# ---------- Stage 2: runtime image ----------
|
|
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
libgomp1 libcurl4-openssl-dev \
|
|
ca-certificates && \
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
# Binaries + libs uit de builder kopiëren
|
|
COPY --from=builder /src/build/bin/llama-server /usr/local/bin/llama-server
|
|
COPY --from=builder /src/build/bin/libggml* /usr/local/lib/
|
|
COPY --from=builder /src/build/bin/libllama* /usr/local/lib/
|
|
COPY --from=builder /src/build/bin/libmtmd* /usr/local/lib/
|
|
COPY --from=builder /src/build/bin/libg* /usr/local/lib/
|
|
|
|
ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
|
|
|
|
EXPOSE 8080
|
|
VOLUME ["/models"]
|
|
|
|
ENTRYPOINT ["llama-server"]
|
|
# Je overschrijft CMD zelf bij docker run; dit is alleen een default
|
|
CMD ["--host", "0.0.0.0", "--port", "8080"]
|