mistral-llm/Dockerfile

65 lines
2.1 KiB
Docker
Raw Permalink Normal View History

2026-02-23 15:01:45 +00:00
# ---------- Stage 1: build llama-server (CUDA, op jouw CPU) ----------
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS builder
ENV DEBIAN_FRONTEND=noninteractive
# Basis build tools + curl dev (voor HTTP server)
RUN apt-get update && apt-get install -y --no-install-recommends \
git build-essential cmake ninja-build ca-certificates \
libcurl4-openssl-dev && \
rm -rf /var/lib/apt/lists/*
WORKDIR /src
# Lama.cpp ophalen (HEAD, incl. nieuwe tool-calling / server)
RUN git clone --depth 1 https://github.com/ggml-org/llama.cpp.git .
# Belangrijk: zorg dat de linker een libcuda.so.1 ziet (stub)
# De stub zit in /usr/local/cuda/lib64/stubs/libcuda.so
RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so \
/usr/lib/x86_64-linux-gnu/libcuda.so.1
# CMake configure:
# - Server aan
# - CUDA backend aan
# - Native CPU-optimalisatie (op jouw T5600, dus geen AVX2)
# - Architectuur 61 (Pascal, P5000)
RUN cmake -S . -B build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DGGML_CUDA=ON \
-DGGML_NATIVE=ON \
-DCMAKE_CUDA_ARCHITECTURES=61
# Build alleen de server
RUN cmake --build build --config Release --target llama-server
# ---------- Stage 2: runtime image ----------
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 libcurl4-openssl-dev \
ca-certificates && \
rm -rf /var/lib/apt/lists/*
# Binaries + libs uit de builder kopiëren
COPY --from=builder /src/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /src/build/bin/libggml* /usr/local/lib/
COPY --from=builder /src/build/bin/libllama* /usr/local/lib/
COPY --from=builder /src/build/bin/libmtmd* /usr/local/lib/
COPY --from=builder /src/build/bin/libg* /usr/local/lib/
ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
EXPOSE 8080
VOLUME ["/models"]
ENTRYPOINT ["llama-server"]
# Je overschrijft CMD zelf bij docker run; dit is alleen een default
CMD ["--host", "0.0.0.0", "--port", "8080"]