diff --git a/agent_repo.py b/agent_repo.py index a7913c3..eb14552 100644 --- a/agent_repo.py +++ b/agent_repo.py @@ -173,7 +173,7 @@ except Exception: logger = logging.getLogger("agent_repo") # ---------- Omgeving / Config ---------- -GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/") +GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/") GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "8bdbe18dd2ec93ecbf9cd0a8f01a6eadf9cfa87d") GITEA_API = os.environ.get("GITEA_API", f"{GITEA_URL}/api/v1").rstrip("/") AGENT_DEFAULT_BRANCH = os.environ.get("AGENT_DEFAULT_BRANCH", "main") @@ -188,7 +188,7 @@ AGENT_CLARIFY_THRESHOLD = float(os.environ.get("AGENT_CLARIFY_THRESHOLD", "0.6") # Meilisearch (optioneel) -MEILI_URL = os.environ.get("MEILI_URL", "http://localhost:7700").strip() +MEILI_URL = os.environ.get("MEILI_URL", "http://192.168.100.1:7700").strip() MEILI_KEY = os.environ.get("MEILI_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ").strip() MEILI_INDEX_PREFIX = os.environ.get("MEILI_INDEX_PREFIX", "code").strip() @@ -377,7 +377,7 @@ def _qdrant_query(collection_name: str, query: str, n_results: int, where: Dict[ Filter, FieldCondition, MatchValue = _qdrant_models # Let op: je hebt hier *ook* een embedder nodig (client-side). In dit skeleton verwachten we dat # je server-side search by text hebt geconfigureerd. Anders: voeg hier je embedder toe. - client = _qdrant(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333"))) + client = _qdrant(host=os.getenv("QDRANT_HOST","192.168.100.1"), port=int(os.getenv("QDRANT_PORT","6333"))) # Eenvoudig: text search (als ingeschakeld). Anders: raise en laat de mock fallback pakken. try: must: List[Any] = [] @@ -4245,7 +4245,7 @@ async def handle_repo_agent(messages: List[dict], request) -> str: pass st.stage = "ASK" base = ("Ik verken de code en doe een voorstel. Geef de repo (bv. `admin/image-viewing-website` of " - "`http://localhost:3080/admin/image-viewing-website.git`). " + "`http://10.25.138.40:30085/admin/image-viewing-website.git`). " "Of zeg: **'zoek repo'** als ik zelf moet zoeken.") return _with_preview(base, st) diff --git a/app.py b/app.py index 67ecc57..1a9574f 100644 --- a/app.py +++ b/app.py @@ -294,6 +294,87 @@ def detect_toolcalls_any(text: str) -> list[dict]: }] return [] +def _coerce_text_toolcalls_to_openai(data: dict) -> dict: + """Als een upstream LLM tool-calls als tekst (bv. '[TOOL_CALLS] ...') teruggeeft, + zet dit om naar OpenAI-native choices[0].message.tool_calls zodat OpenWebUI tools kan runnen. + Laat bestaande tool_calls ongemoeid. + """ + try: + if not isinstance(data, dict): + return data + choices = data.get("choices") or [] + if not choices or not isinstance(choices, list): + return data + ch0 = choices[0] or {} + if not isinstance(ch0, dict): + return data + msg = ch0.get("message") or {} + if not isinstance(msg, dict): + return data + # native tool_calls bestaan al → niets doen + if msg.get("tool_calls"): + return data + + content = msg.get("content") + if not isinstance(content, str): + return data + s = content.strip() + if not s: + return data + + # Alleen proberen als er duidelijke signalen zijn + if ("[TOOL_CALLS]" not in s) and (not s.lstrip().startswith("[")) and ("call_tool" not in s) and ("tool_calls" not in s): + return data + + calls = detect_toolcalls_any(s) or [] + if not calls: + # vLLM/[TOOL_CALLS] stijl: vaak een JSON array na de tag + s2 = re.sub(r"^\s*\[TOOL_CALLS\]\s*", "", s, flags=re.I) + try: + s2 = html.unescape(s2) + except Exception: + pass + m = re.search(r"\[[\s\S]*\]", s2) + arr = None + if m: + try: + arr = json.loads(m.group(0)) + except Exception: + arr = None + if isinstance(arr, list): + calls = [] + for it in arr: + if not isinstance(it, dict): + continue + name = it.get("name") + args = it.get("arguments", {}) + if not name and isinstance(it.get("function"), dict): + name = it["function"].get("name") + args = it["function"].get("arguments", args) + if isinstance(args, str): + try: + args = json.loads(args) + except Exception: + args = {"input": args} + if name: + calls.append({ + "id": f"call_{uuid.uuid4().hex[:8]}", + "type": "function", + "function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)} + }) + + if calls: + msg["role"] = msg.get("role") or "assistant" + msg["content"] = None + msg["tool_calls"] = calls + ch0["message"] = msg + ch0["finish_reason"] = "tool_calls" + data["choices"][0] = ch0 + return data + except Exception: + return data + + # ----------------------------------------------------------------------------- # App & logging # ----------------------------------------------------------------------------- @@ -384,7 +465,7 @@ async def log_requests(request: Request, call_next): # Config # ----------------------------------------------------------------------------- MISTRAL_MODE = os.getenv("MISTRAL_MODE", "v1").lower() -LLM_URL = os.getenv("LLM_URL", "http://localhost:8000/v1/chat/completions").strip() +LLM_URL = os.getenv("LLM_URL", "http://192.168.100.1:8000/v1/chat/completions").strip() RAW_URL = os.getenv("MISTRAL_URL_RAW", "http://host.docker.internal:8000/completion").strip() LLM_CONNECT_TIMEOUT = float(os.getenv("LLM_CONNECT_TIMEOUT", "10")) LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200")) @@ -392,7 +473,7 @@ LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200")) _UPSTREAM_URLS = [u.strip() for u in os.getenv("LLM_UPSTREAMS","").split(",") if u.strip()] # ==== Meilisearch (optioneel) ==== -MEILI_URL = os.getenv("MEILI_URL", "http://localhost:7700").rstrip("/") +MEILI_URL = os.getenv("MEILI_URL", "http://192.168.100.1:7700").rstrip("/") MEILI_API_KEY = os.getenv("MEILI_API_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ") MEILI_INDEX = os.getenv("MEILI_INDEX", "code_chunks") MEILI_ENABLED = bool(MEILI_URL) @@ -485,7 +566,7 @@ if CELERY_ENABLED: celery_app = None # Git / repos -GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/") +GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/") REPO_PATH = os.environ.get("REPO_PATH", "/tmp/repos") # ----------------------------------------------------------------------------- @@ -868,7 +949,7 @@ async def llm_call_openai_compat( *, model: Optional[str] = None, stream: bool = False, - temperature: float = 0.2, + temperature: float = 0.02, top_p: float = 0.9, max_tokens: int = 42000, extra: Optional[dict] = None, @@ -1100,7 +1181,7 @@ async def _svg_from_prompt(prompt: str, w: int, h: int, background: str="white") f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.") resp = await llm_call_openai_compat( [{"role":"system","content":sys},{"role":"user","content":user}], - stream=False, temperature=0.35, top_p=0.9, max_tokens=2048 + stream=False, temperature=0.035, top_p=0.9, max_tokens=2048 ) svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","") return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background) @@ -1668,7 +1749,7 @@ async def present_make( f"Max. {max_slides} dia's, 3–6 bullets per dia.") plan = await llm_call_openai_compat( [{"role":"system","content":sys},{"role":"user","content":user}], - stream=False, temperature=0.3, top_p=0.9, max_tokens=13021 + stream=False, temperature=0.03, top_p=0.9, max_tokens=13021 ) raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}") try: @@ -1712,7 +1793,7 @@ async def vision_ask( file: UploadFile = File(...), prompt: str = Form("Beschrijf kort wat je ziet."), stream: bool = Form(False), - temperature: float = Form(0.2), + temperature: float = Form(0.02), top_p: float = Form(0.9), max_tokens: int = Form(1024), ): @@ -1743,7 +1824,7 @@ async def vision_and_text( stream: bool = Form(False), max_images: int = Form(6), max_chars: int = Form(25000), - temperature: float = Form(0.2), + temperature: float = Form(0.02), top_p: float = Form(0.9), max_tokens: int = Form(2048), ): @@ -1801,7 +1882,7 @@ async def vision_health(): # -------- Tool registry (OpenAI-style) -------- LLM_FUNCTION_CALLING_MODE = os.getenv("LLM_FUNCTION_CALLING_MODE", "auto").lower() # "native" | "shim" | "auto" -OWUI_BASE_URL='http://localhost:3000' +OWUI_BASE_URL='http://192.168.100.1:8089' OWUI_API_TOKEN='sk-f1b7991b054442b5ae388de905019726' # Aliassen zodat oudere codepaths blijven werken OWUI_BASE = OWUI_BASE_URL @@ -1976,6 +2057,17 @@ async def t_run_shell(args: dict) -> dict: async def _execute_tool(name: str, args: dict) -> dict: logger.info("toolcall: "+str(name)+" ("+str(args)+")") + required=[] + if name in TOOLS_REGISTRY: + required=TOOLS_REGISTRY[name]["parameters"]["required"] + else: + return {"error": f"Unknown tool '{name}'."} + if not all(k in args and args[k] for k in required): + return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"} + for k in args: + if k in required: + if args[k] in ['',None]: + return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"} if name == "repo_grep": repo_url = args.get("repo_url","") branch = args.get("branch","main") @@ -2050,26 +2142,33 @@ async def _execute_tool(name: str, args: dict) -> dict: }) return out if name == "rag_query": - out= await run_in_threadpool(_rag_index_repo_sync, **{ - "repo_url": args.get("repo",""), - "branch": "main", - "profile": "auto", - "include": "", - "exclude_dirs": "", - "chunk_chars": 3000, - "overlap": 400, - "collection_name": "code_docs", - "force": False, - }) - out = await rag_query_api( - query=args.get("query",""), - n_results=int(args.get("n_results",5)), - collection_name=_norm_collection_name(args.get("collection_name","code_docs"), "code_docs"), - repo=args.get("repo"), - path_contains=args.get("path_contains"), - profile=args.get("profile") - ) - return out + try: + out= await run_in_threadpool(_rag_index_repo_sync, **{ + "repo_url": args.get("repo",""), + "branch": "main", + "profile": "auto", + "include": "", + "exclude_dirs": "", + "chunk_chars": 3000, + "overlap": 400, + "collection_name": "code_docs", + "force": False, + }) + except Exception as e: + return {"error": f"Error for functioncall '{name}', while doing repo_index. errortext: {str(e)}"} + try: + out = await rag_query_api( + query=args.get("query",""), + n_results=int(args.get("n_results",5)), + collection_name=_norm_collection_name(args.get("collection_name","code_docs"), "code_docs"), + repo=args.get("repo"), + path_contains=args.get("path_contains"), + profile=args.get("profile") + ) + return out + except Exception as e: + return {"error": f"Error for functioncall '{name}', while doing repo_query. errortext: {str(e)}"} + # Console tools if name == "run_shell": @@ -2080,7 +2179,7 @@ async def _execute_tool(name: str, args: dict) -> dict: # Repo if name == "repo_qa": # High-level QA over een specifieke repo. - out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo"),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False) + out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo").replace('"','').replace("'",""),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False) return out # Web tools @@ -2203,7 +2302,7 @@ TOOLS_REGISTRY = { "repo_url":{"type":"string"}, "branch":{"type":"string","default":"main"}, "query":{"type":"string"}, - "max_hits":{"type":"integer","default":200} + "max_hits":{"type":"integer","default":10} }, "required":["repo_url","query"] } @@ -2250,7 +2349,7 @@ TOOLS_REGISTRY = { "path_contains":{"type":["string","null"]}, "profile":{"type":["string","null"]} }, - "required":["query"] + "required":["query","repo"] } }, "web_search_xng": { @@ -2286,7 +2385,7 @@ TOOLS_REGISTRY = { "repo":{"type":"string"}, "question":{"type":"string"}, "branch":{"type":"string"}, - },"required":["repo_hint","question"]} + },"required":["repo","question"]} }, "summarize_text": { "description": "Vat tekst samen in bullets met inleiding en actiepunten.", @@ -2490,7 +2589,7 @@ async def llm_call_autocont( *, model: Optional[str] = None, stream: bool = False, - temperature: float = 0.2, + temperature: float = 0.02, top_p: float = 0.9, max_tokens: int = 1024, extra: Optional[dict] = None, @@ -2581,7 +2680,8 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non stream = bool(body.get("stream", False)) raw_messages = body.get("messages") or [] # normaliseer tool-berichten naar plain tekst voor het LLM - if False: + NORMALIZE_TOOL_MESSAGES = os.getenv("NORMALIZE_TOOL_MESSAGES", "0").lower() not in ("0","false","no") + if NORMALIZE_TOOL_MESSAGES: norm_messages = [] for m in raw_messages: if m.get("role") == "tool": @@ -2616,7 +2716,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non logger.info("🧰 tools_count=%s, tool_choice=%s", len(tools), tool_choice_req) except Exception: pass - if not stream: + if RUN_BRIDGE and not stream: # OWUI stuurt vaak "required" als: "er MOET een tool worden gebruikt". # Als er precies 1 tool is meegegeven, normaliseren we dat naar "force deze tool". @@ -2648,7 +2748,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non client = app.state.HTTPX r = await client.post(LLM_URL, json=passthrough) try: - return JSONResponse(r.json(), status_code=r.status_code) + data = r.json() + data = _coerce_text_toolcalls_to_openai(data) + return JSONResponse(data, status_code=r.status_code) except Exception: return PlainTextResponse(r.text, status_code=r.status_code) @@ -2700,7 +2802,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non client = app.state.HTTPX r = await client.post(LLM_URL, json=passthrough) try: - return JSONResponse(r.json(), status_code=r.status_code) + data = r.json() + data = _coerce_text_toolcalls_to_openai(data) + return JSONResponse(data, status_code=r.status_code) except Exception: return PlainTextResponse(r.text, status_code=r.status_code) @@ -2849,6 +2953,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non if LLM_FUNCTION_CALLING_MODE in ("native","auto") and stream: passthrough = dict(body); passthrough["messages"]=messages if images_b64: passthrough["images"]=images_b64 + STREAM_TOOLCALL_COERCE = os.getenv("STREAM_TOOLCALL_COERCE","1").lower() not in ("0","false","no") async def _aiter(): import asyncio, contextlib client = app.state.HTTPX @@ -2867,6 +2972,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non await q.put(b"__EOF__") reader_task = asyncio.create_task(_reader()) try: + buf = "" + acc = "" + suppress = False while True: try: chunk = await asyncio.wait_for(q.get(), timeout=HEARTBEAT) @@ -2875,7 +2983,91 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non continue if chunk == b"__EOF__": break - yield chunk + if not STREAM_TOOLCALL_COERCE: + yield chunk + continue + try: + buf += chunk.decode("utf-8", errors="ignore") + except Exception: + yield chunk + continue + # SSE events zijn gescheiden door een lege regel + while "\n\n" in buf: + event, buf = buf.split("\n\n", 1) + if not event: + continue + if event.startswith(":"): + yield (event + "\n\n").encode("utf-8") + continue + lines = event.splitlines() + data_lines = [ln[5:].lstrip() for ln in lines if ln.startswith("data:")] + if not data_lines: + if not suppress: + yield (event + "\n\n").encode("utf-8") + continue + data_s = "\n".join(data_lines).strip() + if data_s == "[DONE]": + yield b"data: [DONE]\n\n" + return + try: + obj = json.loads(data_s) + except Exception: + if not suppress: + yield (event + "\n\n").encode("utf-8") + continue + try: + ch0 = (obj.get("choices") or [{}])[0] or {} + delta = ch0.get("delta") or {} + except Exception: + delta = {} + # Als upstream al echte tool_calls streamt: pass-through + if isinstance(delta, dict) and delta.get("tool_calls"): + if not suppress: + yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8") + continue + content = (delta.get("content") if isinstance(delta, dict) else None) + if isinstance(content, str) and content: + acc += content + if "[TOOL_CALLS" in acc: + suppress = True # onderdruk de text-tag stream + calls = detect_toolcalls_any(acc) or [] + if calls: + created = int(time.time()) + chunk_id = obj.get("id") or f"chatcmpl-{uuid.uuid4().hex[:24]}" + model_name = obj.get("model") or body.get("model") or "unknown" + tc_delta = [] + for i, tc in enumerate(calls): + tcc = dict(tc) + tcc["index"] = i + tc_delta.append(tcc) + first = { + "id": chunk_id, + "object": "chat.completion.chunk", + "created": created, + "model": model_name, + "choices": [{ + "index": 0, + "delta": {"role":"assistant", "tool_calls": tc_delta}, + "finish_reason": None + }] + } + second = { + "id": chunk_id, + "object": "chat.completion.chunk", + "created": created, + "model": model_name, + "choices": [{ + "index": 0, + "delta": {}, + "finish_reason": "tool_calls" + }] + } + yield ("data: " + json.dumps(first, ensure_ascii=False) + "\n\n").encode("utf-8") + yield ("data: " + json.dumps(second, ensure_ascii=False) + "\n\n").encode("utf-8") + yield b"data: [DONE]\n\n" + return + if not suppress: + yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8") finally: reader_task.cancel() with contextlib.suppress(Exception): @@ -2895,65 +3087,76 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non if images_b64: passthrough["images"]=images_b64 r = await client.post(LLM_URL, json=passthrough) try: - return JSONResponse(r.json(), status_code=r.status_code) + data = r.json() + data = _coerce_text_toolcalls_to_openai(data) + return JSONResponse(data, status_code=r.status_code) except Exception: return PlainTextResponse(r.text, status_code=r.status_code) - # (A) 1e call: vraag de LLM om tool_calls (geen stream) - first_req = dict(body) - first_req["messages"] = messages - first_req["stream"] = False - if images_b64: first_req["images"] = images_b64 - r1 = await client.post(LLM_URL, json=first_req) - try: - data1 = r1.json() - except Exception: - return PlainTextResponse(r1.text, status_code=r1.status_code) - msg1 = ((data1.get("choices") or [{}])[0].get("message") or {}) - tool_calls = msg1.get("tool_calls") or [] - # Geen tool-calls? Geef direct door. - if not tool_calls: - return JSONResponse(data1, status_code=r1.status_code) - - # (B) voer tool_calls lokaal uit - tool_msgs = [] - for tc in tool_calls: - fn = ((tc or {}).get("function") or {}) - tname = fn.get("name") - raw_args = fn.get("arguments") or "{}" + # Relay-modus: iteratief tools uitvoeren totdat de LLM stopt met tool_calls + max_rounds = int(os.getenv("LLM_TOOL_MAX_ROUNDS", "5")) + follow_messages = messages + last_status = None + for _round in range(max_rounds): + req_i = dict(body) + req_i["messages"] = follow_messages + req_i["stream"] = False + if images_b64: req_i["images"] = images_b64 + r_i = await client.post(LLM_URL, json=req_i) + last_status = r_i.status_code try: - args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {}) + data_i = r_i.json() except Exception: - args = {} - if not tname or tname not in TOOLS_REGISTRY: - out = {"error": f"Unknown tool '{tname}'"} - else: - try: - out = await _execute_tool(tname, args) - except Exception as e: - out = {"error": str(e)} - tool_msgs.append({ - "role": "tool", - "tool_call_id": tc.get("id"), - "name": tname or "unknown", - "content": json.dumps(out, ensure_ascii=False) - }) + return PlainTextResponse(r_i.text, status_code=r_i.status_code) + msg_i = ((data_i.get("choices") or [{}])[0].get("message") or {}) + tool_calls = msg_i.get("tool_calls") or [] + # Fallback: sommige backends gooien toolcalls als tekst (bv. [TOOL_CALLS]) + if not tool_calls: + txt = (msg_i.get("content") or "") + tool_calls = detect_toolcalls_any(txt) or [] + # Geen tool-calls? Geef direct door. + if not tool_calls: + data_i = _coerce_text_toolcalls_to_openai(data_i) + return JSONResponse(data_i, status_code=r_i.status_code) - # (C) 2e call: geef tool outputs terug aan LLM voor eindantwoord - follow_messages = messages + [ - {"role": "assistant", "tool_calls": tool_calls}, - *tool_msgs - ] - second_req = dict(body) - second_req["messages"] = follow_messages - second_req["stream"] = False - # images opnieuw meesturen is niet nodig, maar kan geen kwaad: - if images_b64: second_req["images"] = images_b64 - r2 = await client.post(LLM_URL, json=second_req) - try: - return JSONResponse(r2.json(), status_code=r2.status_code) - except Exception: - return PlainTextResponse(r2.text, status_code=r2.status_code) + # Tools uitvoeren + tool_msgs = [] + for tc in tool_calls: + # Normaliseer tc structuur + tc_id = (tc or {}).get("id") or f"call_{uuid.uuid4().hex[:8]}" + fn = ((tc or {}).get("function") or {}) + tname = fn.get("name") + logger.info(f"Running tool: '{tname}'") + raw_args = fn.get("arguments") or "{}" + try: + args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {}) + except Exception: + args = {} + if not tname or tname not in TOOLS_REGISTRY: + out = {"error": f"Unknown tool '{tname}'"} + else: + try: + out = await _execute_tool(tname, args) + except Exception as e: + out = {"error": str(e)} + tool_msgs.append({ + "role": "tool", + "tool_call_id": tc_id, + "name": tname or "unknown", + "content": json.dumps(out, ensure_ascii=False) + }) + # Zorg dat assistant tool_calls een id heeft + if isinstance(tc, dict) and not tc.get("id"): + tc["id"] = tc_id + + follow_messages = follow_messages + [ + {"role": "assistant", "tool_calls": tool_calls}, + *tool_msgs + ] + + # Te veel tool-rondes → stop om loops te voorkomen + safe_msg = f"Te veel tool-rondes ({max_rounds}). Stop om loop te voorkomen." + return JSONResponse(_openai_chat_response(model, safe_msg, follow_messages), status_code=(last_status or 200)) # shim (non-stream) if LLM_FUNCTION_CALLING_MODE == "shim" and not stream: @@ -2967,7 +3170,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non "Otherwise reply with ONLY: {\"final_answer\":\"...\"}\n\nTools:\n" + "\n".join(tool_lines)) decide = await llm_call_openai_compat( [{"role":"system","content":sys}] + messages, - stream=False, temperature=float(body.get("temperature",0.2)), + stream=False, temperature=float(body.get("temperature",0.02)), top_p=float(body.get("top_p",0.9)), max_tokens=min(512, int(body.get("max_tokens",1024))), extra=extra_payload if extra_payload else None ) @@ -3006,7 +3209,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non ] final = await llm_call_openai_compat( follow, stream=False, - temperature=float(body.get("temperature",0.2)), + temperature=float(body.get("temperature",0.02)), top_p=float(body.get("top_p",0.9)), max_tokens=int(body.get("max_tokens",1024)), extra=extra_payload if extra_payload else None @@ -3023,7 +3226,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non LLM_WINDOWING_ENABLE = os.getenv("LLM_WINDOWING_ENABLE", "1").lower() not in ("0","false","no") MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "13021")) RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024")) - temperature = float(body.get("temperature", 0.2)) + temperature = float(body.get("temperature", 0.02)) top_p = float(body.get("top_p", 0.9)) # respecteer env-override voor default _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024")) @@ -3051,7 +3254,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non {"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."}, {"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."} ] - resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300) + resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300) return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "") trimmed_stream_msgs = await win.build_within_budget(system_prompt=None, summarizer=_summarizer) new_summary = getattr(win, "running_summary", running_summary) @@ -3116,7 +3319,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non else: # --- ÉCHTE streaming (geen tools): direct passthrough met heartbeats --- if stream: - temperature = float(body.get("temperature", 0.2)) + temperature = float(body.get("temperature", 0.02)) top_p = float(body.get("top_p", 0.9)) _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021")) max_tokens = int(body.get("max_tokens", _default_max)) @@ -3138,7 +3341,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "42000")) RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024")) MAX_AUTOCONT = int(os.getenv("LLM_AUTO_CONTINUES", "2")) - temperature = float(body.get("temperature", 0.2)) + temperature = float(body.get("temperature", 0.02)) top_p = float(body.get("top_p", 0.9)) # Laat env de default bepalen, zodat OWUI niet hard op 1024 blijft hangen _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "42000")) @@ -3165,7 +3368,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non {"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."}, {"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."} ] - resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300) + resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300) return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "") trimmed = await win.build_within_budget(system_prompt=None, summarizer=_summarizer) new_summary = getattr(win, "running_summary", running_summary) @@ -3309,7 +3512,7 @@ async def _summarize_files_llm(items: list[tuple[str, str]]) -> dict[str, str]: {"role":"user","content": f"Pad: {path}\n\nInhoud (ingekort):\n{snippet}\n\nAntwoord: "} ] try: - resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=64) + resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=64) summ = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content","").strip() except Exception: summ = "" @@ -3807,14 +4010,19 @@ async def rag_query_api( collection_name_eff = _collection_effective(collection_name) col = _get_collection(collection_name_eff) q_emb = _EMBEDDER.embed_query(query) - where = {} + # Chroma: $and/$or moet >=2 where-expressies bevatten. + conds = [] if repo: - # Accepteer zowel 'repo' (basename) als 'repo_full' (owner/repo) - base = repo.rsplit("/", 1)[-1] - where = {"$and": [ - {"repo_full": {"$eq": repo}} - ]} - if profile: where["profile"] = {"$eq": profile} + conds.append({"repo_full": {"$eq": repo}}) + if branch: + conds.append({"branch": {"$eq": branch}}) + if profile: + conds.append({"profile": {"$eq": profile}}) + where = None + if len(conds) == 1: + where = conds[0] + elif len(conds) >= 2: + where = {"$and": conds} # ---- symbol hit set (repo-scoped) ---- sym_hit_keys: set[str] = set() @@ -4113,7 +4321,7 @@ async def rag_query_api( resp = await llm_call_openai_compat( [{"role":"system","content":"You are precise and return only valid JSON."}, {"role":"user","content": prompt+"\n\nOnly JSON array."}], - stream=False, temperature=0.0, top_p=1.0, max_tokens=1024 + stream=False, temperature=0.01, top_p=1.0, max_tokens=1024 ) try: order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]")) diff --git a/mistral-api.sh b/mistral-api.sh index b0187e0..783f4d7 100755 --- a/mistral-api.sh +++ b/mistral-api.sh @@ -1 +1 @@ -docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=1 -e LLM_CONTEXT_TOKENS=16288 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=2 -e LLM_FUNCTION_CALLING_MODE=shim -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 mistral-api +docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=0 -e LLM_CONTEXT_TOKENS=42000 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=0 -e LLM_FUNCTION_CALLING_MODE=auto -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -e FORCE_ALL_TOOLS=0 -e AUTO_CONTINUE=0 -e STREAM_PREFER_DIRECT=1 mistral-api