86 lines
3.4 KiB
Docker
86 lines
3.4 KiB
Docker
FROM python:3.11-slim
|
|
|
|
WORKDIR /app
|
|
|
|
# ===== Model caches op vaste paden (blijven in image) =====
|
|
# Hugging Face caches (embeddings) + XDG cache (o.a. whisper)
|
|
ENV HF_HOME=/opt/hf \
|
|
HUGGINGFACE_HUB_CACHE=/opt/hf \
|
|
TRANSFORMERS_CACHE=/opt/hf \
|
|
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
|
|
XDG_CACHE_HOME=/opt/cache \
|
|
STT_MODEL=small
|
|
|
|
# Optioneel build-args om modelkeuzes te pinnen
|
|
ARG RAG_EMBEDDINGS=gte-multilingual # of: bge-small / e5-small / gte-base-en
|
|
ARG STT_MODEL_ARG=small # tiny | base | small | medium | large-v3, etc.
|
|
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
|
|
ENV STT_MODEL=${STT_MODEL_ARG}
|
|
|
|
# maak directories nu al aan (rechten)
|
|
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
|
|
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
|
|
|
|
COPY requirements.txt .
|
|
RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential
|
|
RUN pip install --upgrade pip
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25
|
|
#RUN pip cache purge
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
wget ca-certificates libstdc++6 libatomic1 \
|
|
&& rm -rf /var/lib/apt/lists/* \
|
|
&& mkdir -p /opt/piper \
|
|
&& set -eux; \
|
|
URL="https://github.com/rhasspy/piper/releases/download/2023.11.14-2/piper_linux_x86_64.tar.gz"; \
|
|
wget -O /tmp/piper.tgz "$URL"; \
|
|
tar -xzf /tmp/piper.tgz -C /opt/piper --strip-components=1; \
|
|
ln -sf /opt/piper/piper /usr/local/bin/piper; \
|
|
rm -f /tmp/piper.tgz
|
|
|
|
# ===== Prefetch modellen tijdens de build =====
|
|
# 1) SentenceTransformers (embeddings) — volgens je mapping in app.py
|
|
RUN python - <<'PY'
|
|
import os
|
|
from sentence_transformers import SentenceTransformer
|
|
mapping = {
|
|
"gte-multilingual": ("Alibaba-NLP/gte-multilingual-base"),
|
|
"bge-small": ("BAAI/bge-small-en-v1.5"),
|
|
"e5-small": ("intfloat/e5-small-v2"),
|
|
"gte-base-en": ("thenlper/gte-base"),
|
|
}
|
|
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
|
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
|
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
|
|
SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
|
|
print("Prefetched SentenceTransformer:", hf_id)
|
|
PY
|
|
|
|
# 2) faster-whisper (STT) — cache in /opt/cache/whisper
|
|
RUN python - <<'PY'
|
|
import os
|
|
from faster_whisper import WhisperModel
|
|
name = os.environ.get("STT_MODEL","small")
|
|
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
|
|
os.makedirs(cache_root, exist_ok=True)
|
|
# Build-time altijd CPU/INT8 (geen GPU nodig tijdens build)
|
|
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
|
|
print("Prefetched faster-whisper:", name, "->", cache_root)
|
|
PY
|
|
|
|
# (optioneel) piper voice kun je hier ook voorcachen; laat ik nu achterwege omdat voice per omgeving wisselt.
|
|
|
|
|
|
|
|
COPY app.py .
|
|
COPY queue_helper.py .
|
|
COPY agent_repo.py .
|
|
COPY windowing_utils.py .
|
|
COPY smart_rag.py .
|
|
COPY llm_client.py .
|
|
|
|
EXPOSE 8080
|
|
|
|
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
|