109 lines
4.0 KiB
Docker
109 lines
4.0 KiB
Docker
# ===== Base met CUDA11.8 + cuDNN + conda =====
|
||
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
|
||
|
||
WORKDIR /app
|
||
|
||
# Zorg dat conda libs altijd eerst gevonden worden
|
||
ENV LD_LIBRARY_PATH=/opt/conda/lib:${LD_LIBRARY_PATH}
|
||
|
||
# P5000 = Pascal SM 6.1; handig voor (eventueel) on-the-fly builds
|
||
ENV TORCH_CUDA_ARCH_LIST="6.1"
|
||
|
||
# ===== Model caches op vaste paden =====
|
||
ENV HF_HOME=/opt/hf \
|
||
HUGGINGFACE_HUB_CACHE=/opt/hf \
|
||
TRANSFORMERS_CACHE=/opt/hf \
|
||
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
|
||
XDG_CACHE_HOME=/opt/cache \
|
||
STT_MODEL=small
|
||
|
||
ARG RAG_EMBEDDINGS=gte-multilingual
|
||
ARG STT_MODEL_ARG=small
|
||
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
|
||
ENV STT_MODEL=${STT_MODEL_ARG}
|
||
|
||
# directories
|
||
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
|
||
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
|
||
|
||
# ===== Alleen minimale apt utils (géén multimedia libs!) =====
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
|
||
apt-get install -y --no-install-recommends \
|
||
git curl build-essential ca-certificates \
|
||
&& rm -rf /var/lib/apt/lists/*
|
||
|
||
# ===== Multimedia via conda-forge (alles uit één ecosysteem) =====
|
||
# - av 10 + ffmpeg<7 (past goed bij pyAV)
|
||
# - cairo/pango/gdk-pixbuf/pixman voor cairosvg stack
|
||
# VERVANG de vorige conda multimedia regel door deze:
|
||
# Tooling voor PyAV build
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||
pkg-config git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential && rm -rf /var/lib/apt/lists/*
|
||
|
||
# FFmpeg via conda-forge (zodat je recente headers/libs hebt)
|
||
RUN conda config --system --set channel_priority flexible \
|
||
&& conda install -y -c conda-forge "ffmpeg>=6,<8" \
|
||
&& conda clean -afy
|
||
|
||
# Later in je pip stap:
|
||
# ... faster-whisper==1.0.0 zal av==11.* trekken en nu WEL kunnen bouwen tegen conda’s FFmpeg 6
|
||
|
||
# ===== Python deps =====
|
||
COPY requirements.txt .
|
||
RUN pip install --upgrade pip
|
||
# jouw requirements
|
||
RUN pip install --no-cache-dir -r requirements.txt
|
||
# losse extras (let op: av via conda, niet via pip!)
|
||
RUN pip install --no-cache-dir \
|
||
PyPDF2 python-multipart gitpython chromadb httpx meilisearch \
|
||
pandas openpyxl python-pptx faster-whisper==1.0.0 \
|
||
cairosvg sentence-transformers rank-bm25 pymupdf
|
||
|
||
# ===== Prefetch modellen =====
|
||
|
||
# 1) SentenceTransformers
|
||
RUN python - <<'PY'
|
||
import os
|
||
from sentence_transformers import SentenceTransformer
|
||
mapping = {
|
||
"gte-multilingual": "Alibaba-NLP/gte-multilingual-base",
|
||
"bge-small": "BAAI/bge-small-en-v1.5",
|
||
"e5-small": "intfloat/e5-small-v2",
|
||
"gte-base-en": "thenlper/gte-base",
|
||
}
|
||
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
||
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
||
cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
|
||
local_dir = os.path.join(cache_root, "embedder")
|
||
os.makedirs(cache_root, exist_ok=True)
|
||
print("Downloading SentenceTransformer:", hf_id)
|
||
model = SentenceTransformer(hf_id, cache_folder=cache_root, device="cpu") # download only
|
||
model.save(local_dir)
|
||
print("Prefetched SentenceTransformer:", hf_id)
|
||
PY
|
||
|
||
# 2) faster-whisper (prefetch CPU-kant; runtime kan je device kiezen)
|
||
RUN python - <<'PY'
|
||
import os
|
||
from faster_whisper import WhisperModel
|
||
name = os.environ.get("STT_MODEL","small")
|
||
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
|
||
os.makedirs(cache_root, exist_ok=True)
|
||
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
|
||
print("Prefetched faster-whisper:", name, "->", cache_root)
|
||
PY
|
||
|
||
# (optioneel) piper skip ik hier; kan later
|
||
|
||
# ===== App code =====
|
||
COPY app.py .
|
||
COPY queue_helper.py .
|
||
COPY agent_repo.py .
|
||
COPY windowing_utils.py .
|
||
COPY smart_rag.py .
|
||
COPY llm_client.py .
|
||
COPY web_search.py .
|
||
|
||
EXPOSE 8080
|
||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
|