diff --git a/Dockerfile b/Dockerfile index 3aae3d6..de84bdc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,7 +53,15 @@ mapping = { choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower() hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5") # cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet: -SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers")) +cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers") +local_dir = os.path.join(cache_root, "embedder") +os.makedirs(cache_root, exist_ok = True) + +print("Downloading SentenceTransformer:", hf_id) +# Laat SentenceTransformer zelf hun cache doen (HF_HOME etc) +# wij saven het eindresultaat daarna naar local_dir +model = SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers")) +model.save(local_dir) print("Prefetched SentenceTransformer:", hf_id) PY diff --git a/app.py b/app.py index de778ed..d2ca986 100644 --- a/app.py +++ b/app.py @@ -190,10 +190,21 @@ def _extract_tool_calls_from_text(txt: str): raw = obj["call_tool"] if isinstance(raw, dict): raw = [raw] + if isinstance(raw, str): + print("Invalid toolcall:",str(raw)) + raw=[] tc = [] for it in (raw or []): - name = (it or {}).get("name") - args = (it or {}).get("arguments") or {} + try: + name = (it or {}).get("name") + except Exception as e: + print("Error:",str(e),"raw=",str(raw),"it=",str(it)) + continue + try: + args = (it or {}).get("arguments") or {} + except Exception as e: + print("Error:",str(e),"raw=",str(raw),"it=",str(it)) + continue if isinstance(args, str): try: args = json.loads(args) except Exception: pass @@ -540,11 +551,23 @@ def _build_embedder() -> _Embedder: model_name, family, slug = mapping["bge-small"] else: model_name, family, slug = mapping[choice] + cache_dir = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers") + local_dir = os.path.join(cache_dir, "embedder") st_kwargs = {"device": "cpu"} + if os.path.isdir(local_dir): + # Prefetched model in image → gebruik dat + model_source = local_dir + print(f"Loading SentenceTransformer from local dir: {local_dir}") + else: + # Geen lokale dir gevonden → terugvallen op HF-ID + cache_folder + model_source = model_name + st_kwargs["cache_folder"] = cache_dir + print(f"Local dir {local_dir} not found, falling back to HF model: {model_name}") + try: if "trust_remote_code" in inspect.signature(SentenceTransformer).parameters: st_kwargs["trust_remote_code"] = True - model = SentenceTransformer(model_name, **st_kwargs) + model = SentenceTransformer(model_source, **st_kwargs) # optioneel: CPU thread-telling forceren try: thr = int(os.getenv("RAG_TORCH_THREADS", "0")) @@ -554,8 +577,8 @@ def _build_embedder() -> _Embedder: except Exception: pass return _Embedder(slug=slug, family=family, model=model, device="cpu") - except Exception: - pass + except Exception as e: + print("ERROR building embedder:",str(e)) # Fallback via Chroma embedding functions from chromadb.utils import embedding_functions as ef @@ -2630,6 +2653,8 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non }) call = (obj.get("call_tool") or {}) + if isinstance(call, str): + call={"name": "Invalid_toolcall", "arguments": {}} tname = call.get("name") if tname not in TOOLS_REGISTRY: return JSONResponse(_openai_chat_response(model, f"Onbekende tool: {tname}", messages))