mistral-api/Dockerfile

109 lines
4.0 KiB
Docker
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# ===== Base met CUDA11.8 + cuDNN + conda =====
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
WORKDIR /app
# Zorg dat conda libs altijd eerst gevonden worden
ENV LD_LIBRARY_PATH=/opt/conda/lib:${LD_LIBRARY_PATH}
# P5000 = Pascal SM 6.1; handig voor (eventueel) on-the-fly builds
ENV TORCH_CUDA_ARCH_LIST="6.1"
# ===== Model caches op vaste paden =====
ENV HF_HOME=/opt/hf \
HUGGINGFACE_HUB_CACHE=/opt/hf \
TRANSFORMERS_CACHE=/opt/hf \
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
XDG_CACHE_HOME=/opt/cache \
STT_MODEL=small
ARG RAG_EMBEDDINGS=gte-multilingual
ARG STT_MODEL_ARG=small
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
ENV STT_MODEL=${STT_MODEL_ARG}
# directories
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
# ===== Alleen minimale apt utils (géén multimedia libs!) =====
RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
apt-get install -y --no-install-recommends \
git curl build-essential ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# ===== Multimedia via conda-forge (alles uit één ecosysteem) =====
# - av 10 + ffmpeg<7 (past goed bij pyAV)
# - cairo/pango/gdk-pixbuf/pixman voor cairosvg stack
# VERVANG de vorige conda multimedia regel door deze:
# Tooling voor PyAV build
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
pkg-config git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential && rm -rf /var/lib/apt/lists/*
# FFmpeg via conda-forge (zodat je recente headers/libs hebt)
RUN conda config --system --set channel_priority flexible \
&& conda install -y -c conda-forge "ffmpeg>=6,<8" \
&& conda clean -afy
# Later in je pip stap:
# ... faster-whisper==1.0.0 zal av==11.* trekken en nu WEL kunnen bouwen tegen condas FFmpeg 6
# ===== Python deps =====
COPY requirements.txt .
RUN pip install --upgrade pip
# jouw requirements
RUN pip install --no-cache-dir -r requirements.txt
# losse extras (let op: av via conda, niet via pip!)
RUN pip install --no-cache-dir \
PyPDF2 python-multipart gitpython chromadb httpx meilisearch \
pandas openpyxl python-pptx faster-whisper==1.0.0 \
cairosvg sentence-transformers rank-bm25
# ===== Prefetch modellen =====
# 1) SentenceTransformers
RUN python - <<'PY'
import os
from sentence_transformers import SentenceTransformer
mapping = {
"gte-multilingual": "Alibaba-NLP/gte-multilingual-base",
"bge-small": "BAAI/bge-small-en-v1.5",
"e5-small": "intfloat/e5-small-v2",
"gte-base-en": "thenlper/gte-base",
}
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
local_dir = os.path.join(cache_root, "embedder")
os.makedirs(cache_root, exist_ok=True)
print("Downloading SentenceTransformer:", hf_id)
model = SentenceTransformer(hf_id, cache_folder=cache_root, device="cpu") # download only
model.save(local_dir)
print("Prefetched SentenceTransformer:", hf_id)
PY
# 2) faster-whisper (prefetch CPU-kant; runtime kan je device kiezen)
RUN python - <<'PY'
import os
from faster_whisper import WhisperModel
name = os.environ.get("STT_MODEL","small")
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
os.makedirs(cache_root, exist_ok=True)
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
print("Prefetched faster-whisper:", name, "->", cache_root)
PY
# (optioneel) piper skip ik hier; kan later
# ===== App code =====
COPY app.py .
COPY queue_helper.py .
COPY agent_repo.py .
COPY windowing_utils.py .
COPY smart_rag.py .
COPY llm_client.py .
COPY web_search.py .
EXPOSE 8080
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]