diff --git a/agent_repo.py b/agent_repo.py
index a7913c3..eb14552 100644
--- a/agent_repo.py
+++ b/agent_repo.py
@@ -173,7 +173,7 @@ except Exception:
 logger = logging.getLogger("agent_repo")
 
 # ---------- Omgeving / Config ----------
-GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/")
+GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/")
 GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "8bdbe18dd2ec93ecbf9cd0a8f01a6eadf9cfa87d")
 GITEA_API = os.environ.get("GITEA_API", f"{GITEA_URL}/api/v1").rstrip("/")
 AGENT_DEFAULT_BRANCH = os.environ.get("AGENT_DEFAULT_BRANCH", "main")
@@ -188,7 +188,7 @@ AGENT_CLARIFY_THRESHOLD = float(os.environ.get("AGENT_CLARIFY_THRESHOLD", "0.6")
 
 
 # Meilisearch (optioneel)
-MEILI_URL = os.environ.get("MEILI_URL", "http://localhost:7700").strip()
+MEILI_URL = os.environ.get("MEILI_URL", "http://192.168.100.1:7700").strip()
 MEILI_KEY = os.environ.get("MEILI_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ").strip()
 MEILI_INDEX_PREFIX = os.environ.get("MEILI_INDEX_PREFIX", "code").strip()
 
@@ -377,7 +377,7 @@ def _qdrant_query(collection_name: str, query: str, n_results: int, where: Dict[
     Filter, FieldCondition, MatchValue = _qdrant_models
     # Let op: je hebt hier *ook* een embedder nodig (client-side). In dit skeleton verwachten we dat
     # je server-side search by text hebt geconfigureerd. Anders: voeg hier je embedder toe.
-    client = _qdrant(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333")))
+    client = _qdrant(host=os.getenv("QDRANT_HOST","192.168.100.1"), port=int(os.getenv("QDRANT_PORT","6333")))
     # Eenvoudig: text search (als ingeschakeld). Anders: raise en laat de mock fallback pakken.
     try:
         must: List[Any] = []
@@ -4245,7 +4245,7 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
                 pass
         st.stage = "ASK"
         base = ("Ik verken de code en doe een voorstel. Geef de repo (bv. `admin/image-viewing-website` of "
-                "`http://localhost:3080/admin/image-viewing-website.git`). "
+                "`http://10.25.138.40:30085/admin/image-viewing-website.git`). "
                 "Of zeg: **'zoek repo'** als ik zelf moet zoeken.")
         return _with_preview(base, st)
 
diff --git a/app.py b/app.py
index 67ecc57..1a9574f 100644
--- a/app.py
+++ b/app.py
@@ -294,6 +294,87 @@ def detect_toolcalls_any(text: str) -> list[dict]:
             }]
     return []
 
+def _coerce_text_toolcalls_to_openai(data: dict) -> dict:
+    """Als een upstream LLM tool-calls als tekst (bv. '[TOOL_CALLS] ...') teruggeeft,
+    zet dit om naar OpenAI-native choices[0].message.tool_calls zodat OpenWebUI tools kan runnen.
+    Laat bestaande tool_calls ongemoeid.
+    """
+    try:
+        if not isinstance(data, dict):
+            return data
+        choices = data.get("choices") or []
+        if not choices or not isinstance(choices, list):
+            return data
+        ch0 = choices[0] or {}
+        if not isinstance(ch0, dict):
+            return data
+        msg = ch0.get("message") or {}
+        if not isinstance(msg, dict):
+            return data
+        # native tool_calls bestaan al → niets doen
+        if msg.get("tool_calls"):
+            return data
+
+        content = msg.get("content")
+        if not isinstance(content, str):
+            return data
+        s = content.strip()
+        if not s:
+            return data
+
+        # Alleen proberen als er duidelijke signalen zijn
+        if ("[TOOL_CALLS]" not in s) and (not s.lstrip().startswith("[")) and ("call_tool" not in s) and ("tool_calls" not in s):
+            return data
+
+        calls = detect_toolcalls_any(s) or []
+        if not calls:
+            # vLLM/[TOOL_CALLS] stijl: vaak een JSON array na de tag
+            s2 = re.sub(r"^\s*\[TOOL_CALLS\]\s*", "", s, flags=re.I)
+            try:
+                s2 = html.unescape(s2)
+            except Exception:
+                pass
+            m = re.search(r"\[[\s\S]*\]", s2)
+            arr = None
+            if m:
+                try:
+                    arr = json.loads(m.group(0))
+                except Exception:
+                    arr = None
+            if isinstance(arr, list):
+                calls = []
+                for it in arr:
+                    if not isinstance(it, dict):
+                        continue
+                    name = it.get("name")
+                    args = it.get("arguments", {})
+                    if not name and isinstance(it.get("function"), dict):
+                        name = it["function"].get("name")
+                        args = it["function"].get("arguments", args)
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except Exception:
+                            args = {"input": args}
+                    if name:
+                        calls.append({
+                            "id": f"call_{uuid.uuid4().hex[:8]}",
+                            "type": "function",
+                            "function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)}
+                        })
+
+        if calls:
+            msg["role"] = msg.get("role") or "assistant"
+            msg["content"] = None
+            msg["tool_calls"] = calls
+            ch0["message"] = msg
+            ch0["finish_reason"] = "tool_calls"
+            data["choices"][0] = ch0
+        return data
+    except Exception:
+        return data
+
+
 # -----------------------------------------------------------------------------
 # App & logging
 # -----------------------------------------------------------------------------
@@ -384,7 +465,7 @@ async def log_requests(request: Request, call_next):
 # Config
 # -----------------------------------------------------------------------------
 MISTRAL_MODE = os.getenv("MISTRAL_MODE", "v1").lower()
-LLM_URL = os.getenv("LLM_URL", "http://localhost:8000/v1/chat/completions").strip()
+LLM_URL = os.getenv("LLM_URL", "http://192.168.100.1:8000/v1/chat/completions").strip()
 RAW_URL = os.getenv("MISTRAL_URL_RAW", "http://host.docker.internal:8000/completion").strip()
 LLM_CONNECT_TIMEOUT = float(os.getenv("LLM_CONNECT_TIMEOUT", "10"))
 LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200"))
@@ -392,7 +473,7 @@ LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200"))
 _UPSTREAM_URLS = [u.strip() for u in os.getenv("LLM_UPSTREAMS","").split(",") if u.strip()]
 
 # ==== Meilisearch (optioneel) ====
-MEILI_URL      = os.getenv("MEILI_URL", "http://localhost:7700").rstrip("/")
+MEILI_URL      = os.getenv("MEILI_URL", "http://192.168.100.1:7700").rstrip("/")
 MEILI_API_KEY  = os.getenv("MEILI_API_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ")
 MEILI_INDEX    = os.getenv("MEILI_INDEX", "code_chunks")
 MEILI_ENABLED  = bool(MEILI_URL)
@@ -485,7 +566,7 @@ if CELERY_ENABLED:
         celery_app = None
 
 # Git / repos
-GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/")
+GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/")
 REPO_PATH = os.environ.get("REPO_PATH", "/tmp/repos")
 
 # -----------------------------------------------------------------------------
@@ -868,7 +949,7 @@ async def llm_call_openai_compat(
     *,
     model: Optional[str] = None,
     stream: bool = False,
-    temperature: float = 0.2,
+    temperature: float = 0.02,
     top_p: float = 0.9,
     max_tokens: int = 42000,
     extra: Optional[dict] = None,
@@ -1100,7 +1181,7 @@ async def _svg_from_prompt(prompt: str, w: int, h: int, background: str="white")
             f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.")
     resp = await llm_call_openai_compat(
         [{"role":"system","content":sys},{"role":"user","content":user}],
-        stream=False, temperature=0.35, top_p=0.9, max_tokens=2048
+        stream=False, temperature=0.035, top_p=0.9, max_tokens=2048
     )
     svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
     return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background)
@@ -1668,7 +1749,7 @@ async def present_make(
             f"Max. {max_slides} dia's, 3–6 bullets per dia.")
     plan = await llm_call_openai_compat(
         [{"role":"system","content":sys},{"role":"user","content":user}],
-        stream=False, temperature=0.3, top_p=0.9, max_tokens=13021
+        stream=False, temperature=0.03, top_p=0.9, max_tokens=13021
     )
     raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
     try:
@@ -1712,7 +1793,7 @@ async def vision_ask(
     file: UploadFile = File(...),
     prompt: str = Form("Beschrijf kort wat je ziet."),
     stream: bool = Form(False),
-    temperature: float = Form(0.2),
+    temperature: float = Form(0.02),
     top_p: float = Form(0.9),
     max_tokens: int = Form(1024),
 ):
@@ -1743,7 +1824,7 @@ async def vision_and_text(
     stream: bool = Form(False),
     max_images: int = Form(6),
     max_chars: int = Form(25000),
-    temperature: float = Form(0.2),
+    temperature: float = Form(0.02),
     top_p: float = Form(0.9),
     max_tokens: int = Form(2048),
 ):
@@ -1801,7 +1882,7 @@ async def vision_health():
 # -------- Tool registry (OpenAI-style) --------
 LLM_FUNCTION_CALLING_MODE = os.getenv("LLM_FUNCTION_CALLING_MODE", "auto").lower()  # "native" | "shim" | "auto"
 
-OWUI_BASE_URL='http://localhost:3000'
+OWUI_BASE_URL='http://192.168.100.1:8089'
 OWUI_API_TOKEN='sk-f1b7991b054442b5ae388de905019726'
 # Aliassen zodat oudere codepaths blijven werken
 OWUI_BASE = OWUI_BASE_URL
@@ -1976,6 +2057,17 @@ async def t_run_shell(args: dict) -> dict:
 
 async def _execute_tool(name: str, args: dict) -> dict:
     logger.info("toolcall: "+str(name)+" ("+str(args)+")")
+    required=[]
+    if name in TOOLS_REGISTRY:
+        required=TOOLS_REGISTRY[name]["parameters"]["required"]
+    else:
+        return {"error": f"Unknown tool '{name}'."}
+    if not all(k in args and args[k] for k in required):
+        return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"}
+    for k in args:
+        if k in required:
+            if args[k] in ['',None]:
+                return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"}
     if name == "repo_grep":
         repo_url = args.get("repo_url","")
         branch   = args.get("branch","main")
@@ -2050,26 +2142,33 @@ async def _execute_tool(name: str, args: dict) -> dict:
         })
         return out
     if name == "rag_query":
-        out= await run_in_threadpool(_rag_index_repo_sync, **{
-            "repo_url": args.get("repo",""),
-            "branch": "main",
-            "profile": "auto",
-            "include": "",
-            "exclude_dirs": "",
-            "chunk_chars": 3000,
-            "overlap": 400,
-            "collection_name": "code_docs",
-            "force": False,
-        })
-        out = await rag_query_api(
-            query=args.get("query",""),
-            n_results=int(args.get("n_results",5)),
-            collection_name=_norm_collection_name(args.get("collection_name","code_docs"), "code_docs"),
-            repo=args.get("repo"),
-            path_contains=args.get("path_contains"),
-            profile=args.get("profile")
-        )
-        return out
+        try:
+            out= await run_in_threadpool(_rag_index_repo_sync, **{
+                "repo_url": args.get("repo",""),
+                "branch": "main",
+                "profile": "auto",
+                "include": "",
+                "exclude_dirs": "",
+                "chunk_chars": 3000,
+                "overlap": 400,
+                "collection_name": "code_docs",
+                "force": False,
+            })
+        except Exception as e:
+            return {"error": f"Error for functioncall '{name}', while doing repo_index. errortext: {str(e)}"}
+        try:
+            out = await rag_query_api(
+                query=args.get("query",""),
+                n_results=int(args.get("n_results",5)),
+                collection_name=_norm_collection_name(args.get("collection_name","code_docs"), "code_docs"),
+                repo=args.get("repo"),
+                path_contains=args.get("path_contains"),
+                profile=args.get("profile")
+            )
+            return out
+        except Exception as e:
+            return {"error": f"Error for functioncall '{name}', while doing repo_query. errortext: {str(e)}"}
+
 
     # Console tools
     if name == "run_shell":
@@ -2080,7 +2179,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
     # Repo
     if name == "repo_qa":
         # High-level QA over een specifieke repo.
-        out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo"),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False)
+        out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo").replace('"','').replace("'",""),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False)
         return out
         
     # Web tools
@@ -2203,7 +2302,7 @@ TOOLS_REGISTRY = {
                 "repo_url":{"type":"string"},
                 "branch":{"type":"string","default":"main"},
                 "query":{"type":"string"},
-                "max_hits":{"type":"integer","default":200}
+                "max_hits":{"type":"integer","default":10}
             },
             "required":["repo_url","query"]
         }
@@ -2250,7 +2349,7 @@ TOOLS_REGISTRY = {
                 "path_contains":{"type":["string","null"]},
                 "profile":{"type":["string","null"]}
             },
-            "required":["query"]
+            "required":["query","repo"]
         }
     },
     "web_search_xng": {
@@ -2286,7 +2385,7 @@ TOOLS_REGISTRY = {
                 "repo":{"type":"string"},
                 "question":{"type":"string"},
                 "branch":{"type":"string"},
-        },"required":["repo_hint","question"]}
+        },"required":["repo","question"]}
     },
     "summarize_text": {
         "description": "Vat tekst samen in bullets met inleiding en actiepunten.",
@@ -2490,7 +2589,7 @@ async def llm_call_autocont(
     *,
     model: Optional[str] = None,
     stream: bool = False,
-    temperature: float = 0.2,
+    temperature: float = 0.02,
     top_p: float = 0.9,
     max_tokens: int = 1024,
     extra: Optional[dict] = None,
@@ -2581,7 +2680,8 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
     stream = bool(body.get("stream", False))
     raw_messages = body.get("messages") or []
     # normaliseer tool-berichten naar plain tekst voor het LLM
-    if False:
+    NORMALIZE_TOOL_MESSAGES = os.getenv("NORMALIZE_TOOL_MESSAGES", "0").lower() not in ("0","false","no")
+    if NORMALIZE_TOOL_MESSAGES:
         norm_messages = []
         for m in raw_messages:
             if m.get("role") == "tool":
@@ -2616,7 +2716,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
         logger.info("🧰 tools_count=%s, tool_choice=%s", len(tools), tool_choice_req)
     except Exception:
         pass
-    if not stream:
+    if RUN_BRIDGE and not stream:
 
         # OWUI stuurt vaak "required" als: "er MOET een tool worden gebruikt".
         # Als er precies 1 tool is meegegeven, normaliseren we dat naar "force deze tool".
@@ -2648,7 +2748,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                 client = app.state.HTTPX
                 r = await client.post(LLM_URL, json=passthrough)
                 try:
-                    return JSONResponse(r.json(), status_code=r.status_code)
+                    data = r.json()
+                    data = _coerce_text_toolcalls_to_openai(data)
+                    return JSONResponse(data, status_code=r.status_code)
                 except Exception:
                     return PlainTextResponse(r.text, status_code=r.status_code)
 
@@ -2700,7 +2802,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                     client = app.state.HTTPX
                     r = await client.post(LLM_URL, json=passthrough)
                     try:
-                        return JSONResponse(r.json(), status_code=r.status_code)
+                        data = r.json()
+                        data = _coerce_text_toolcalls_to_openai(data)
+                        return JSONResponse(data, status_code=r.status_code)
                     except Exception:
                         return PlainTextResponse(r.text, status_code=r.status_code)
 
@@ -2849,6 +2953,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
         if LLM_FUNCTION_CALLING_MODE in ("native","auto") and stream:
             passthrough = dict(body); passthrough["messages"]=messages
             if images_b64: passthrough["images"]=images_b64
+            STREAM_TOOLCALL_COERCE = os.getenv("STREAM_TOOLCALL_COERCE","1").lower() not in ("0","false","no")
             async def _aiter():
                 import asyncio, contextlib
                 client = app.state.HTTPX
@@ -2867,6 +2972,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                             await q.put(b"__EOF__")
                     reader_task = asyncio.create_task(_reader())
                     try:
+                        buf = ""
+                        acc = ""
+                        suppress = False
                         while True:
                             try:
                                 chunk = await asyncio.wait_for(q.get(), timeout=HEARTBEAT)
@@ -2875,7 +2983,91 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                                 continue
                             if chunk == b"__EOF__":
                                 break
-                            yield chunk
+                            if not STREAM_TOOLCALL_COERCE:
+                                yield chunk
+                                continue
+                            try:
+                                buf += chunk.decode("utf-8", errors="ignore")
+                            except Exception:
+                                yield chunk
+                                continue
+                            # SSE events zijn gescheiden door een lege regel
+                            while "\n\n" in buf:
+                                event, buf = buf.split("\n\n", 1)
+                                if not event:
+                                    continue
+                                if event.startswith(":"):
+                                    yield (event + "\n\n").encode("utf-8")
+                                    continue
+                                lines = event.splitlines()
+                                data_lines = [ln[5:].lstrip() for ln in lines if ln.startswith("data:")]
+                                if not data_lines:
+                                    if not suppress:
+                                        yield (event + "\n\n").encode("utf-8")
+                                    continue
+                                data_s = "\n".join(data_lines).strip()
+                                if data_s == "[DONE]":
+                                    yield b"data: [DONE]\n\n"
+                                    return
+                                try:
+                                    obj = json.loads(data_s)
+                                except Exception:
+                                    if not suppress:
+                                        yield (event + "\n\n").encode("utf-8")
+                                    continue
+                                try:
+                                    ch0 = (obj.get("choices") or [{}])[0] or {}
+                                    delta = ch0.get("delta") or {}
+                                except Exception:
+                                    delta = {}
+                                # Als upstream al echte tool_calls streamt: pass-through
+                                if isinstance(delta, dict) and delta.get("tool_calls"):
+                                    if not suppress:
+                                        yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8")
+                                    continue
+                                content = (delta.get("content") if isinstance(delta, dict) else None)
+                                if isinstance(content, str) and content:
+                                    acc += content
+                                    if "[TOOL_CALLS" in acc:
+                                        suppress = True  # onderdruk de text-tag stream
+                                    calls = detect_toolcalls_any(acc) or []
+                                    if calls:
+                                        created = int(time.time())
+                                        chunk_id = obj.get("id") or f"chatcmpl-{uuid.uuid4().hex[:24]}"
+                                        model_name = obj.get("model") or body.get("model") or "unknown"
+                                        tc_delta = []
+                                        for i, tc in enumerate(calls):
+                                            tcc = dict(tc)
+                                            tcc["index"] = i
+                                            tc_delta.append(tcc)
+                                        first = {
+                                            "id": chunk_id,
+                                            "object": "chat.completion.chunk",
+                                            "created": created,
+                                            "model": model_name,
+                                            "choices": [{
+                                                "index": 0,
+                                                "delta": {"role":"assistant", "tool_calls": tc_delta},
+                                                "finish_reason": None
+                                            }]
+                                        }
+                                        second = {
+                                            "id": chunk_id,
+                                            "object": "chat.completion.chunk",
+                                            "created": created,
+                                            "model": model_name,
+                                            "choices": [{
+                                                "index": 0,
+                                                "delta": {},
+                                                "finish_reason": "tool_calls"
+                                            }]
+                                        }
+                                        yield ("data: " + json.dumps(first, ensure_ascii=False) + "\n\n").encode("utf-8")
+                                        yield ("data: " + json.dumps(second, ensure_ascii=False) + "\n\n").encode("utf-8")
+                                        yield b"data: [DONE]\n\n"
+                                        return
+                                if not suppress:
+                                    yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8")
                     finally:
                         reader_task.cancel()
                         with contextlib.suppress(Exception):
@@ -2895,65 +3087,76 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                 if images_b64: passthrough["images"]=images_b64
                 r = await client.post(LLM_URL, json=passthrough)
                 try:
-                    return JSONResponse(r.json(), status_code=r.status_code)
+                    data = r.json()
+                    data = _coerce_text_toolcalls_to_openai(data)
+                    return JSONResponse(data, status_code=r.status_code)
                 except Exception:
                     return PlainTextResponse(r.text, status_code=r.status_code)
 
-            # (A) 1e call: vraag de LLM om tool_calls (geen stream)
-            first_req = dict(body)
-            first_req["messages"] = messages
-            first_req["stream"] = False
-            if images_b64: first_req["images"] = images_b64
-            r1 = await client.post(LLM_URL, json=first_req)
-            try:
-                data1 = r1.json()
-            except Exception:
-                return PlainTextResponse(r1.text, status_code=r1.status_code)
-            msg1 = ((data1.get("choices") or [{}])[0].get("message") or {})
-            tool_calls = msg1.get("tool_calls") or []
-            # Geen tool-calls? Geef direct door.
-            if not tool_calls:
-                return JSONResponse(data1, status_code=r1.status_code)
-
-            # (B) voer tool_calls lokaal uit
-            tool_msgs = []
-            for tc in tool_calls:
-                fn = ((tc or {}).get("function") or {})
-                tname = fn.get("name")
-                raw_args = fn.get("arguments") or "{}"
+            # Relay-modus: iteratief tools uitvoeren totdat de LLM stopt met tool_calls
+            max_rounds = int(os.getenv("LLM_TOOL_MAX_ROUNDS", "5"))
+            follow_messages = messages
+            last_status = None
+            for _round in range(max_rounds):
+                req_i = dict(body)
+                req_i["messages"] = follow_messages
+                req_i["stream"] = False
+                if images_b64: req_i["images"] = images_b64
+                r_i = await client.post(LLM_URL, json=req_i)
+                last_status = r_i.status_code
                 try:
-                    args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {})
+                    data_i = r_i.json()
                 except Exception:
-                    args = {}
-                if not tname or tname not in TOOLS_REGISTRY:
-                    out = {"error": f"Unknown tool '{tname}'"}
-                else:
-                    try:
-                        out = await _execute_tool(tname, args)
-                    except Exception as e:
-                        out = {"error": str(e)}
-                tool_msgs.append({
-                    "role": "tool",
-                    "tool_call_id": tc.get("id"),
-                    "name": tname or "unknown",
-                    "content": json.dumps(out, ensure_ascii=False)
-                })
+                    return PlainTextResponse(r_i.text, status_code=r_i.status_code)
+                msg_i = ((data_i.get("choices") or [{}])[0].get("message") or {})
+                tool_calls = msg_i.get("tool_calls") or []
+                # Fallback: sommige backends gooien toolcalls als tekst (bv. [TOOL_CALLS])
+                if not tool_calls:
+                    txt = (msg_i.get("content") or "")
+                    tool_calls = detect_toolcalls_any(txt) or []
+                # Geen tool-calls? Geef direct door.
+                if not tool_calls:
+                    data_i = _coerce_text_toolcalls_to_openai(data_i)
+                    return JSONResponse(data_i, status_code=r_i.status_code)
 
-            # (C) 2e call: geef tool outputs terug aan LLM voor eindantwoord
-            follow_messages = messages + [
-                {"role": "assistant", "tool_calls": tool_calls},
-                *tool_msgs
-            ]
-            second_req = dict(body)
-            second_req["messages"] = follow_messages
-            second_req["stream"] = False
-            # images opnieuw meesturen is niet nodig, maar kan geen kwaad:
-            if images_b64: second_req["images"] = images_b64
-            r2 = await client.post(LLM_URL, json=second_req)
-            try:
-                return JSONResponse(r2.json(), status_code=r2.status_code)
-            except Exception:
-                return PlainTextResponse(r2.text, status_code=r2.status_code)
+                # Tools uitvoeren
+                tool_msgs = []
+                for tc in tool_calls:
+                    # Normaliseer tc structuur
+                    tc_id = (tc or {}).get("id") or f"call_{uuid.uuid4().hex[:8]}"
+                    fn = ((tc or {}).get("function") or {})
+                    tname = fn.get("name")
+                    logger.info(f"Running tool: '{tname}'")
+                    raw_args = fn.get("arguments") or "{}"
+                    try:
+                        args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {})
+                    except Exception:
+                        args = {}
+                    if not tname or tname not in TOOLS_REGISTRY:
+                        out = {"error": f"Unknown tool '{tname}'"}
+                    else:
+                        try:
+                            out = await _execute_tool(tname, args)
+                        except Exception as e:
+                            out = {"error": str(e)}
+                    tool_msgs.append({
+                        "role": "tool",
+                        "tool_call_id": tc_id,
+                        "name": tname or "unknown",
+                        "content": json.dumps(out, ensure_ascii=False)
+                    })
+                    # Zorg dat assistant tool_calls een id heeft
+                    if isinstance(tc, dict) and not tc.get("id"):
+                        tc["id"] = tc_id
+
+                follow_messages = follow_messages + [
+                    {"role": "assistant", "tool_calls": tool_calls},
+                    *tool_msgs
+                ]
+
+            # Te veel tool-rondes → stop om loops te voorkomen
+            safe_msg = f"Te veel tool-rondes ({max_rounds}). Stop om loop te voorkomen."
+            return JSONResponse(_openai_chat_response(model, safe_msg, follow_messages), status_code=(last_status or 200))
 
         # shim (non-stream)
         if LLM_FUNCTION_CALLING_MODE == "shim" and not stream:
@@ -2967,7 +3170,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                    "Otherwise reply with ONLY: {\"final_answer\":\"...\"}\n\nTools:\n" + "\n".join(tool_lines))
             decide = await llm_call_openai_compat(
                 [{"role":"system","content":sys}] + messages,
-                stream=False, temperature=float(body.get("temperature",0.2)),
+                stream=False, temperature=float(body.get("temperature",0.02)),
                 top_p=float(body.get("top_p",0.9)), max_tokens=min(512, int(body.get("max_tokens",1024))),
                 extra=extra_payload if extra_payload else None
             )
@@ -3006,7 +3209,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
             ]
             final = await llm_call_openai_compat(
                 follow, stream=False,
-                temperature=float(body.get("temperature",0.2)),
+                temperature=float(body.get("temperature",0.02)),
                 top_p=float(body.get("top_p",0.9)),
                 max_tokens=int(body.get("max_tokens",1024)),
                 extra=extra_payload if extra_payload else None
@@ -3023,7 +3226,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
             LLM_WINDOWING_ENABLE = os.getenv("LLM_WINDOWING_ENABLE", "1").lower() not in ("0","false","no")
             MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "13021"))
             RESP_RESERVE   = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
-            temperature = float(body.get("temperature", 0.2))
+            temperature = float(body.get("temperature", 0.02))
             top_p       = float(body.get("top_p", 0.9))
             # respecteer env-override voor default
             _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024"))
@@ -3051,7 +3254,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                             {"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
                             {"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
                         ]
-                        resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300)
+                        resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300)
                         return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
                     trimmed_stream_msgs = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
                     new_summary = getattr(win, "running_summary", running_summary)
@@ -3116,7 +3319,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
     else:
         # --- ÉCHTE streaming (geen tools): direct passthrough met heartbeats ---
         if stream:
-            temperature = float(body.get("temperature", 0.2))
+            temperature = float(body.get("temperature", 0.02))
             top_p       = float(body.get("top_p", 0.9))
             _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021"))
             max_tokens  = int(body.get("max_tokens", _default_max))
@@ -3138,7 +3341,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
     MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "42000"))
     RESP_RESERVE   = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
     MAX_AUTOCONT   = int(os.getenv("LLM_AUTO_CONTINUES", "2"))
-    temperature = float(body.get("temperature", 0.2))
+    temperature = float(body.get("temperature", 0.02))
     top_p       = float(body.get("top_p", 0.9))
     # Laat env de default bepalen, zodat OWUI niet hard op 1024 blijft hangen
     _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "42000"))
@@ -3165,7 +3368,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
                     {"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
                     {"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
                 ]
-                resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300)
+                resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300)
                 return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
             trimmed = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
             new_summary = getattr(win, "running_summary", running_summary)
@@ -3309,7 +3512,7 @@ async def _summarize_files_llm(items: list[tuple[str, str]]) -> dict[str, str]:
             {"role":"user","content": f"Pad: {path}\n\nInhoud (ingekort):\n{snippet}\n\nAntwoord: "}
         ]
         try:
-            resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=64)
+            resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=64)
             summ = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content","").strip()
         except Exception:
             summ = ""
@@ -3807,14 +4010,19 @@ async def rag_query_api(
     collection_name_eff = _collection_effective(collection_name)
     col = _get_collection(collection_name_eff)
     q_emb = _EMBEDDER.embed_query(query)
-    where = {}
+    # Chroma: $and/$or moet >=2 where-expressies bevatten.
+    conds = []
     if repo:
-        # Accepteer zowel 'repo' (basename) als 'repo_full' (owner/repo)
-        base = repo.rsplit("/", 1)[-1]
-        where = {"$and": [
-            {"repo_full": {"$eq": repo}}
-        ]}
-    if profile: where["profile"] = {"$eq": profile}
+        conds.append({"repo_full": {"$eq": repo}})
+    if branch:
+        conds.append({"branch": {"$eq": branch}})
+    if profile:
+        conds.append({"profile": {"$eq": profile}})
+    where = None
+    if len(conds) == 1:
+        where = conds[0]
+    elif len(conds) >= 2:
+        where = {"$and": conds}
 
     # ---- symbol hit set (repo-scoped) ----
     sym_hit_keys: set[str] = set()
@@ -4113,7 +4321,7 @@ async def rag_query_api(
         resp = await llm_call_openai_compat(
             [{"role":"system","content":"You are precise and return only valid JSON."},
              {"role":"user","content": prompt+"\n\nOnly JSON array."}],
-            stream=False, temperature=0.0, top_p=1.0, max_tokens=1024
+            stream=False, temperature=0.01, top_p=1.0, max_tokens=1024
         )
         try:
             order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]"))
diff --git a/mistral-api.sh b/mistral-api.sh
index b0187e0..783f4d7 100755
--- a/mistral-api.sh
+++ b/mistral-api.sh
@@ -1 +1 @@
-docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=1 -e LLM_CONTEXT_TOKENS=16288 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=2 -e LLM_FUNCTION_CALLING_MODE=shim -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 mistral-api
+docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=0 -e LLM_CONTEXT_TOKENS=42000 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=0 -e LLM_FUNCTION_CALLING_MODE=auto -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -e FORCE_ALL_TOOLS=0 -e AUTO_CONTINUE=0 -e STREAM_PREFER_DIRECT=1 mistral-api