mistral-api/Dockerfile

FROM python:3.11-slim

WORKDIR /app

# ===== Model caches op vaste paden (blijven in image) =====
# Hugging Face caches (embeddings) + XDG cache (o.a. whisper)
ENV HF_HOME=/opt/hf \
    HUGGINGFACE_HUB_CACHE=/opt/hf \
    TRANSFORMERS_CACHE=/opt/hf \
    SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
    XDG_CACHE_HOME=/opt/cache \
    STT_MODEL=small

# Optioneel build-args om modelkeuzes te pinnen
ARG RAG_EMBEDDINGS=gte-multilingual       # of: bge-small / e5-small / gte-base-en
ARG STT_MODEL_ARG=small                   # tiny | base | small | medium | large-v3, etc.
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
ENV STT_MODEL=${STT_MODEL_ARG}

# maak directories nu al aan (rechten)
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
    chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper

COPY requirements.txt .
RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25
#RUN pip cache purge

RUN apt-get update && apt-get install -y --no-install-recommends \
      wget ca-certificates libstdc++6 libatomic1 \
  && rm -rf /var/lib/apt/lists/* \
  && mkdir -p /opt/piper \
  && set -eux; \
     URL="https://github.com/rhasspy/piper/releases/download/2023.11.14-2/piper_linux_x86_64.tar.gz"; \
     wget -O /tmp/piper.tgz "$URL"; \
     tar -xzf /tmp/piper.tgz -C /opt/piper --strip-components=1; \
     ln -sf /opt/piper/piper /usr/local/bin/piper; \
     rm -f /tmp/piper.tgz

# ===== Prefetch modellen tijdens de build =====
# 1) SentenceTransformers (embeddings) — volgens je mapping in app.py
RUN python - <<'PY'
import os
from sentence_transformers import SentenceTransformer
mapping = {
    "gte-multilingual": ("Alibaba-NLP/gte-multilingual-base"),
    "bge-small": ("BAAI/bge-small-en-v1.5"),
    "e5-small": ("intfloat/e5-small-v2"),
    "gte-base-en": ("thenlper/gte-base"),
}
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
print("Prefetched SentenceTransformer:", hf_id)
PY

# 2) faster-whisper (STT) — cache in /opt/cache/whisper
RUN python - <<'PY'
import os
from faster_whisper import WhisperModel
name = os.environ.get("STT_MODEL","small")
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
os.makedirs(cache_root, exist_ok=True)
# Build-time altijd CPU/INT8 (geen GPU nodig tijdens build)
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
print("Prefetched faster-whisper:", name, "->", cache_root)
PY

# (optioneel) piper voice kun je hier ook voorcachen; laat ik nu achterwege omdat voice per omgeving wisselt.


COPY app.py .
COPY queue_helper.py .
COPY agent_repo.py .
COPY windowing_utils.py .
COPY smart_rag.py .
COPY llm_client.py .

EXPOSE 8080

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]