mistral-api/Dockerfile
2025-11-27 08:54:07 +01:00

86 lines
3.4 KiB
Docker

FROM python:3.11-slim
WORKDIR /app
# ===== Model caches op vaste paden (blijven in image) =====
# Hugging Face caches (embeddings) + XDG cache (o.a. whisper)
ENV HF_HOME=/opt/hf \
HUGGINGFACE_HUB_CACHE=/opt/hf \
TRANSFORMERS_CACHE=/opt/hf \
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
XDG_CACHE_HOME=/opt/cache \
STT_MODEL=small
# Optioneel build-args om modelkeuzes te pinnen
ARG RAG_EMBEDDINGS=gte-multilingual # of: bge-small / e5-small / gte-base-en
ARG STT_MODEL_ARG=small # tiny | base | small | medium | large-v3, etc.
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
ENV STT_MODEL=${STT_MODEL_ARG}
# maak directories nu al aan (rechten)
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
COPY requirements.txt .
RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25
#RUN pip cache purge
RUN apt-get update && apt-get install -y --no-install-recommends \
wget ca-certificates libstdc++6 libatomic1 \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /opt/piper \
&& set -eux; \
URL="https://github.com/rhasspy/piper/releases/download/2023.11.14-2/piper_linux_x86_64.tar.gz"; \
wget -O /tmp/piper.tgz "$URL"; \
tar -xzf /tmp/piper.tgz -C /opt/piper --strip-components=1; \
ln -sf /opt/piper/piper /usr/local/bin/piper; \
rm -f /tmp/piper.tgz
# ===== Prefetch modellen tijdens de build =====
# 1) SentenceTransformers (embeddings) — volgens je mapping in app.py
RUN python - <<'PY'
import os
from sentence_transformers import SentenceTransformer
mapping = {
"gte-multilingual": ("Alibaba-NLP/gte-multilingual-base"),
"bge-small": ("BAAI/bge-small-en-v1.5"),
"e5-small": ("intfloat/e5-small-v2"),
"gte-base-en": ("thenlper/gte-base"),
}
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
print("Prefetched SentenceTransformer:", hf_id)
PY
# 2) faster-whisper (STT) — cache in /opt/cache/whisper
RUN python - <<'PY'
import os
from faster_whisper import WhisperModel
name = os.environ.get("STT_MODEL","small")
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
os.makedirs(cache_root, exist_ok=True)
# Build-time altijd CPU/INT8 (geen GPU nodig tijdens build)
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
print("Prefetched faster-whisper:", name, "->", cache_root)
PY
# (optioneel) piper voice kun je hier ook voorcachen; laat ik nu achterwege omdat voice per omgeving wisselt.
COPY app.py .
COPY queue_helper.py .
COPY agent_repo.py .
COPY windowing_utils.py .
COPY smart_rag.py .
COPY llm_client.py .
EXPOSE 8080
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]