update
This commit is contained in:
parent
932144e798
commit
1aaf0d013a
@ -173,7 +173,7 @@ except Exception:
|
|||||||
logger = logging.getLogger("agent_repo")
|
logger = logging.getLogger("agent_repo")
|
||||||
|
|
||||||
# ---------- Omgeving / Config ----------
|
# ---------- Omgeving / Config ----------
|
||||||
GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/")
|
GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/")
|
||||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "8bdbe18dd2ec93ecbf9cd0a8f01a6eadf9cfa87d")
|
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "8bdbe18dd2ec93ecbf9cd0a8f01a6eadf9cfa87d")
|
||||||
GITEA_API = os.environ.get("GITEA_API", f"{GITEA_URL}/api/v1").rstrip("/")
|
GITEA_API = os.environ.get("GITEA_API", f"{GITEA_URL}/api/v1").rstrip("/")
|
||||||
AGENT_DEFAULT_BRANCH = os.environ.get("AGENT_DEFAULT_BRANCH", "main")
|
AGENT_DEFAULT_BRANCH = os.environ.get("AGENT_DEFAULT_BRANCH", "main")
|
||||||
@ -188,7 +188,7 @@ AGENT_CLARIFY_THRESHOLD = float(os.environ.get("AGENT_CLARIFY_THRESHOLD", "0.6")
|
|||||||
|
|
||||||
|
|
||||||
# Meilisearch (optioneel)
|
# Meilisearch (optioneel)
|
||||||
MEILI_URL = os.environ.get("MEILI_URL", "http://localhost:7700").strip()
|
MEILI_URL = os.environ.get("MEILI_URL", "http://192.168.100.1:7700").strip()
|
||||||
MEILI_KEY = os.environ.get("MEILI_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ").strip()
|
MEILI_KEY = os.environ.get("MEILI_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ").strip()
|
||||||
MEILI_INDEX_PREFIX = os.environ.get("MEILI_INDEX_PREFIX", "code").strip()
|
MEILI_INDEX_PREFIX = os.environ.get("MEILI_INDEX_PREFIX", "code").strip()
|
||||||
|
|
||||||
@ -377,7 +377,7 @@ def _qdrant_query(collection_name: str, query: str, n_results: int, where: Dict[
|
|||||||
Filter, FieldCondition, MatchValue = _qdrant_models
|
Filter, FieldCondition, MatchValue = _qdrant_models
|
||||||
# Let op: je hebt hier *ook* een embedder nodig (client-side). In dit skeleton verwachten we dat
|
# Let op: je hebt hier *ook* een embedder nodig (client-side). In dit skeleton verwachten we dat
|
||||||
# je server-side search by text hebt geconfigureerd. Anders: voeg hier je embedder toe.
|
# je server-side search by text hebt geconfigureerd. Anders: voeg hier je embedder toe.
|
||||||
client = _qdrant(host=os.getenv("QDRANT_HOST","localhost"), port=int(os.getenv("QDRANT_PORT","6333")))
|
client = _qdrant(host=os.getenv("QDRANT_HOST","192.168.100.1"), port=int(os.getenv("QDRANT_PORT","6333")))
|
||||||
# Eenvoudig: text search (als ingeschakeld). Anders: raise en laat de mock fallback pakken.
|
# Eenvoudig: text search (als ingeschakeld). Anders: raise en laat de mock fallback pakken.
|
||||||
try:
|
try:
|
||||||
must: List[Any] = []
|
must: List[Any] = []
|
||||||
@ -4245,7 +4245,7 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
|
|||||||
pass
|
pass
|
||||||
st.stage = "ASK"
|
st.stage = "ASK"
|
||||||
base = ("Ik verken de code en doe een voorstel. Geef de repo (bv. `admin/image-viewing-website` of "
|
base = ("Ik verken de code en doe een voorstel. Geef de repo (bv. `admin/image-viewing-website` of "
|
||||||
"`http://localhost:3080/admin/image-viewing-website.git`). "
|
"`http://10.25.138.40:30085/admin/image-viewing-website.git`). "
|
||||||
"Of zeg: **'zoek repo'** als ik zelf moet zoeken.")
|
"Of zeg: **'zoek repo'** als ik zelf moet zoeken.")
|
||||||
return _with_preview(base, st)
|
return _with_preview(base, st)
|
||||||
|
|
||||||
|
|||||||
328
app.py
328
app.py
@ -294,6 +294,87 @@ def detect_toolcalls_any(text: str) -> list[dict]:
|
|||||||
}]
|
}]
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _coerce_text_toolcalls_to_openai(data: dict) -> dict:
|
||||||
|
"""Als een upstream LLM tool-calls als tekst (bv. '[TOOL_CALLS] ...') teruggeeft,
|
||||||
|
zet dit om naar OpenAI-native choices[0].message.tool_calls zodat OpenWebUI tools kan runnen.
|
||||||
|
Laat bestaande tool_calls ongemoeid.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return data
|
||||||
|
choices = data.get("choices") or []
|
||||||
|
if not choices or not isinstance(choices, list):
|
||||||
|
return data
|
||||||
|
ch0 = choices[0] or {}
|
||||||
|
if not isinstance(ch0, dict):
|
||||||
|
return data
|
||||||
|
msg = ch0.get("message") or {}
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
return data
|
||||||
|
# native tool_calls bestaan al → niets doen
|
||||||
|
if msg.get("tool_calls"):
|
||||||
|
return data
|
||||||
|
|
||||||
|
content = msg.get("content")
|
||||||
|
if not isinstance(content, str):
|
||||||
|
return data
|
||||||
|
s = content.strip()
|
||||||
|
if not s:
|
||||||
|
return data
|
||||||
|
|
||||||
|
# Alleen proberen als er duidelijke signalen zijn
|
||||||
|
if ("[TOOL_CALLS]" not in s) and (not s.lstrip().startswith("[")) and ("call_tool" not in s) and ("tool_calls" not in s):
|
||||||
|
return data
|
||||||
|
|
||||||
|
calls = detect_toolcalls_any(s) or []
|
||||||
|
if not calls:
|
||||||
|
# vLLM/[TOOL_CALLS] stijl: vaak een JSON array na de tag
|
||||||
|
s2 = re.sub(r"^\s*\[TOOL_CALLS\]\s*", "", s, flags=re.I)
|
||||||
|
try:
|
||||||
|
s2 = html.unescape(s2)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
m = re.search(r"\[[\s\S]*\]", s2)
|
||||||
|
arr = None
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
arr = json.loads(m.group(0))
|
||||||
|
except Exception:
|
||||||
|
arr = None
|
||||||
|
if isinstance(arr, list):
|
||||||
|
calls = []
|
||||||
|
for it in arr:
|
||||||
|
if not isinstance(it, dict):
|
||||||
|
continue
|
||||||
|
name = it.get("name")
|
||||||
|
args = it.get("arguments", {})
|
||||||
|
if not name and isinstance(it.get("function"), dict):
|
||||||
|
name = it["function"].get("name")
|
||||||
|
args = it["function"].get("arguments", args)
|
||||||
|
if isinstance(args, str):
|
||||||
|
try:
|
||||||
|
args = json.loads(args)
|
||||||
|
except Exception:
|
||||||
|
args = {"input": args}
|
||||||
|
if name:
|
||||||
|
calls.append({
|
||||||
|
"id": f"call_{uuid.uuid4().hex[:8]}",
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)}
|
||||||
|
})
|
||||||
|
|
||||||
|
if calls:
|
||||||
|
msg["role"] = msg.get("role") or "assistant"
|
||||||
|
msg["content"] = None
|
||||||
|
msg["tool_calls"] = calls
|
||||||
|
ch0["message"] = msg
|
||||||
|
ch0["finish_reason"] = "tool_calls"
|
||||||
|
data["choices"][0] = ch0
|
||||||
|
return data
|
||||||
|
except Exception:
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# App & logging
|
# App & logging
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
@ -384,7 +465,7 @@ async def log_requests(request: Request, call_next):
|
|||||||
# Config
|
# Config
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
MISTRAL_MODE = os.getenv("MISTRAL_MODE", "v1").lower()
|
MISTRAL_MODE = os.getenv("MISTRAL_MODE", "v1").lower()
|
||||||
LLM_URL = os.getenv("LLM_URL", "http://localhost:8000/v1/chat/completions").strip()
|
LLM_URL = os.getenv("LLM_URL", "http://192.168.100.1:8000/v1/chat/completions").strip()
|
||||||
RAW_URL = os.getenv("MISTRAL_URL_RAW", "http://host.docker.internal:8000/completion").strip()
|
RAW_URL = os.getenv("MISTRAL_URL_RAW", "http://host.docker.internal:8000/completion").strip()
|
||||||
LLM_CONNECT_TIMEOUT = float(os.getenv("LLM_CONNECT_TIMEOUT", "10"))
|
LLM_CONNECT_TIMEOUT = float(os.getenv("LLM_CONNECT_TIMEOUT", "10"))
|
||||||
LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200"))
|
LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200"))
|
||||||
@ -392,7 +473,7 @@ LLM_READ_TIMEOUT = float(os.getenv("LLM_READ_TIMEOUT", "1200"))
|
|||||||
_UPSTREAM_URLS = [u.strip() for u in os.getenv("LLM_UPSTREAMS","").split(",") if u.strip()]
|
_UPSTREAM_URLS = [u.strip() for u in os.getenv("LLM_UPSTREAMS","").split(",") if u.strip()]
|
||||||
|
|
||||||
# ==== Meilisearch (optioneel) ====
|
# ==== Meilisearch (optioneel) ====
|
||||||
MEILI_URL = os.getenv("MEILI_URL", "http://localhost:7700").rstrip("/")
|
MEILI_URL = os.getenv("MEILI_URL", "http://192.168.100.1:7700").rstrip("/")
|
||||||
MEILI_API_KEY = os.getenv("MEILI_API_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ")
|
MEILI_API_KEY = os.getenv("MEILI_API_KEY", "0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ")
|
||||||
MEILI_INDEX = os.getenv("MEILI_INDEX", "code_chunks")
|
MEILI_INDEX = os.getenv("MEILI_INDEX", "code_chunks")
|
||||||
MEILI_ENABLED = bool(MEILI_URL)
|
MEILI_ENABLED = bool(MEILI_URL)
|
||||||
@ -485,7 +566,7 @@ if CELERY_ENABLED:
|
|||||||
celery_app = None
|
celery_app = None
|
||||||
|
|
||||||
# Git / repos
|
# Git / repos
|
||||||
GITEA_URL = os.environ.get("GITEA_URL", "http://localhost:3080").rstrip("/")
|
GITEA_URL = os.environ.get("GITEA_URL", "http://10.25.138.40:30085").rstrip("/")
|
||||||
REPO_PATH = os.environ.get("REPO_PATH", "/tmp/repos")
|
REPO_PATH = os.environ.get("REPO_PATH", "/tmp/repos")
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
@ -868,7 +949,7 @@ async def llm_call_openai_compat(
|
|||||||
*,
|
*,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
temperature: float = 0.2,
|
temperature: float = 0.02,
|
||||||
top_p: float = 0.9,
|
top_p: float = 0.9,
|
||||||
max_tokens: int = 42000,
|
max_tokens: int = 42000,
|
||||||
extra: Optional[dict] = None,
|
extra: Optional[dict] = None,
|
||||||
@ -1100,7 +1181,7 @@ async def _svg_from_prompt(prompt: str, w: int, h: int, background: str="white")
|
|||||||
f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.")
|
f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.")
|
||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":sys},{"role":"user","content":user}],
|
[{"role":"system","content":sys},{"role":"user","content":user}],
|
||||||
stream=False, temperature=0.35, top_p=0.9, max_tokens=2048
|
stream=False, temperature=0.035, top_p=0.9, max_tokens=2048
|
||||||
)
|
)
|
||||||
svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
||||||
return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background)
|
return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background)
|
||||||
@ -1668,7 +1749,7 @@ async def present_make(
|
|||||||
f"Max. {max_slides} dia's, 3–6 bullets per dia.")
|
f"Max. {max_slides} dia's, 3–6 bullets per dia.")
|
||||||
plan = await llm_call_openai_compat(
|
plan = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":sys},{"role":"user","content":user}],
|
[{"role":"system","content":sys},{"role":"user","content":user}],
|
||||||
stream=False, temperature=0.3, top_p=0.9, max_tokens=13021
|
stream=False, temperature=0.03, top_p=0.9, max_tokens=13021
|
||||||
)
|
)
|
||||||
raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
|
raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
|
||||||
try:
|
try:
|
||||||
@ -1712,7 +1793,7 @@ async def vision_ask(
|
|||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
prompt: str = Form("Beschrijf kort wat je ziet."),
|
prompt: str = Form("Beschrijf kort wat je ziet."),
|
||||||
stream: bool = Form(False),
|
stream: bool = Form(False),
|
||||||
temperature: float = Form(0.2),
|
temperature: float = Form(0.02),
|
||||||
top_p: float = Form(0.9),
|
top_p: float = Form(0.9),
|
||||||
max_tokens: int = Form(1024),
|
max_tokens: int = Form(1024),
|
||||||
):
|
):
|
||||||
@ -1743,7 +1824,7 @@ async def vision_and_text(
|
|||||||
stream: bool = Form(False),
|
stream: bool = Form(False),
|
||||||
max_images: int = Form(6),
|
max_images: int = Form(6),
|
||||||
max_chars: int = Form(25000),
|
max_chars: int = Form(25000),
|
||||||
temperature: float = Form(0.2),
|
temperature: float = Form(0.02),
|
||||||
top_p: float = Form(0.9),
|
top_p: float = Form(0.9),
|
||||||
max_tokens: int = Form(2048),
|
max_tokens: int = Form(2048),
|
||||||
):
|
):
|
||||||
@ -1801,7 +1882,7 @@ async def vision_health():
|
|||||||
# -------- Tool registry (OpenAI-style) --------
|
# -------- Tool registry (OpenAI-style) --------
|
||||||
LLM_FUNCTION_CALLING_MODE = os.getenv("LLM_FUNCTION_CALLING_MODE", "auto").lower() # "native" | "shim" | "auto"
|
LLM_FUNCTION_CALLING_MODE = os.getenv("LLM_FUNCTION_CALLING_MODE", "auto").lower() # "native" | "shim" | "auto"
|
||||||
|
|
||||||
OWUI_BASE_URL='http://localhost:3000'
|
OWUI_BASE_URL='http://192.168.100.1:8089'
|
||||||
OWUI_API_TOKEN='sk-f1b7991b054442b5ae388de905019726'
|
OWUI_API_TOKEN='sk-f1b7991b054442b5ae388de905019726'
|
||||||
# Aliassen zodat oudere codepaths blijven werken
|
# Aliassen zodat oudere codepaths blijven werken
|
||||||
OWUI_BASE = OWUI_BASE_URL
|
OWUI_BASE = OWUI_BASE_URL
|
||||||
@ -1976,6 +2057,17 @@ async def t_run_shell(args: dict) -> dict:
|
|||||||
|
|
||||||
async def _execute_tool(name: str, args: dict) -> dict:
|
async def _execute_tool(name: str, args: dict) -> dict:
|
||||||
logger.info("toolcall: "+str(name)+" ("+str(args)+")")
|
logger.info("toolcall: "+str(name)+" ("+str(args)+")")
|
||||||
|
required=[]
|
||||||
|
if name in TOOLS_REGISTRY:
|
||||||
|
required=TOOLS_REGISTRY[name]["parameters"]["required"]
|
||||||
|
else:
|
||||||
|
return {"error": f"Unknown tool '{name}'."}
|
||||||
|
if not all(k in args and args[k] for k in required):
|
||||||
|
return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"}
|
||||||
|
for k in args:
|
||||||
|
if k in required:
|
||||||
|
if args[k] in ['',None]:
|
||||||
|
return {"error": f"Missing required arguments for tool '{name}'. Required: {required}"}
|
||||||
if name == "repo_grep":
|
if name == "repo_grep":
|
||||||
repo_url = args.get("repo_url","")
|
repo_url = args.get("repo_url","")
|
||||||
branch = args.get("branch","main")
|
branch = args.get("branch","main")
|
||||||
@ -2050,6 +2142,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
if name == "rag_query":
|
if name == "rag_query":
|
||||||
|
try:
|
||||||
out= await run_in_threadpool(_rag_index_repo_sync, **{
|
out= await run_in_threadpool(_rag_index_repo_sync, **{
|
||||||
"repo_url": args.get("repo",""),
|
"repo_url": args.get("repo",""),
|
||||||
"branch": "main",
|
"branch": "main",
|
||||||
@ -2061,6 +2154,9 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
"collection_name": "code_docs",
|
"collection_name": "code_docs",
|
||||||
"force": False,
|
"force": False,
|
||||||
})
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Error for functioncall '{name}', while doing repo_index. errortext: {str(e)}"}
|
||||||
|
try:
|
||||||
out = await rag_query_api(
|
out = await rag_query_api(
|
||||||
query=args.get("query",""),
|
query=args.get("query",""),
|
||||||
n_results=int(args.get("n_results",5)),
|
n_results=int(args.get("n_results",5)),
|
||||||
@ -2070,6 +2166,9 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
profile=args.get("profile")
|
profile=args.get("profile")
|
||||||
)
|
)
|
||||||
return out
|
return out
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Error for functioncall '{name}', while doing repo_query. errortext: {str(e)}"}
|
||||||
|
|
||||||
|
|
||||||
# Console tools
|
# Console tools
|
||||||
if name == "run_shell":
|
if name == "run_shell":
|
||||||
@ -2080,7 +2179,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
# Repo
|
# Repo
|
||||||
if name == "repo_qa":
|
if name == "repo_qa":
|
||||||
# High-level QA over een specifieke repo.
|
# High-level QA over een specifieke repo.
|
||||||
out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo"),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False)
|
out=json.dumps(await repo_qa_answer(repo_hint=args.get("repo").replace('"','').replace("'",""),question=args.get("question"),branch=args.get("branch","main"),n_ctx=10), ensure_ascii=False)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
# Web tools
|
# Web tools
|
||||||
@ -2203,7 +2302,7 @@ TOOLS_REGISTRY = {
|
|||||||
"repo_url":{"type":"string"},
|
"repo_url":{"type":"string"},
|
||||||
"branch":{"type":"string","default":"main"},
|
"branch":{"type":"string","default":"main"},
|
||||||
"query":{"type":"string"},
|
"query":{"type":"string"},
|
||||||
"max_hits":{"type":"integer","default":200}
|
"max_hits":{"type":"integer","default":10}
|
||||||
},
|
},
|
||||||
"required":["repo_url","query"]
|
"required":["repo_url","query"]
|
||||||
}
|
}
|
||||||
@ -2250,7 +2349,7 @@ TOOLS_REGISTRY = {
|
|||||||
"path_contains":{"type":["string","null"]},
|
"path_contains":{"type":["string","null"]},
|
||||||
"profile":{"type":["string","null"]}
|
"profile":{"type":["string","null"]}
|
||||||
},
|
},
|
||||||
"required":["query"]
|
"required":["query","repo"]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"web_search_xng": {
|
"web_search_xng": {
|
||||||
@ -2286,7 +2385,7 @@ TOOLS_REGISTRY = {
|
|||||||
"repo":{"type":"string"},
|
"repo":{"type":"string"},
|
||||||
"question":{"type":"string"},
|
"question":{"type":"string"},
|
||||||
"branch":{"type":"string"},
|
"branch":{"type":"string"},
|
||||||
},"required":["repo_hint","question"]}
|
},"required":["repo","question"]}
|
||||||
},
|
},
|
||||||
"summarize_text": {
|
"summarize_text": {
|
||||||
"description": "Vat tekst samen in bullets met inleiding en actiepunten.",
|
"description": "Vat tekst samen in bullets met inleiding en actiepunten.",
|
||||||
@ -2490,7 +2589,7 @@ async def llm_call_autocont(
|
|||||||
*,
|
*,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
temperature: float = 0.2,
|
temperature: float = 0.02,
|
||||||
top_p: float = 0.9,
|
top_p: float = 0.9,
|
||||||
max_tokens: int = 1024,
|
max_tokens: int = 1024,
|
||||||
extra: Optional[dict] = None,
|
extra: Optional[dict] = None,
|
||||||
@ -2581,7 +2680,8 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
stream = bool(body.get("stream", False))
|
stream = bool(body.get("stream", False))
|
||||||
raw_messages = body.get("messages") or []
|
raw_messages = body.get("messages") or []
|
||||||
# normaliseer tool-berichten naar plain tekst voor het LLM
|
# normaliseer tool-berichten naar plain tekst voor het LLM
|
||||||
if False:
|
NORMALIZE_TOOL_MESSAGES = os.getenv("NORMALIZE_TOOL_MESSAGES", "0").lower() not in ("0","false","no")
|
||||||
|
if NORMALIZE_TOOL_MESSAGES:
|
||||||
norm_messages = []
|
norm_messages = []
|
||||||
for m in raw_messages:
|
for m in raw_messages:
|
||||||
if m.get("role") == "tool":
|
if m.get("role") == "tool":
|
||||||
@ -2616,7 +2716,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
logger.info("🧰 tools_count=%s, tool_choice=%s", len(tools), tool_choice_req)
|
logger.info("🧰 tools_count=%s, tool_choice=%s", len(tools), tool_choice_req)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
if not stream:
|
if RUN_BRIDGE and not stream:
|
||||||
|
|
||||||
# OWUI stuurt vaak "required" als: "er MOET een tool worden gebruikt".
|
# OWUI stuurt vaak "required" als: "er MOET een tool worden gebruikt".
|
||||||
# Als er precies 1 tool is meegegeven, normaliseren we dat naar "force deze tool".
|
# Als er precies 1 tool is meegegeven, normaliseren we dat naar "force deze tool".
|
||||||
@ -2648,7 +2748,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
client = app.state.HTTPX
|
client = app.state.HTTPX
|
||||||
r = await client.post(LLM_URL, json=passthrough)
|
r = await client.post(LLM_URL, json=passthrough)
|
||||||
try:
|
try:
|
||||||
return JSONResponse(r.json(), status_code=r.status_code)
|
data = r.json()
|
||||||
|
data = _coerce_text_toolcalls_to_openai(data)
|
||||||
|
return JSONResponse(data, status_code=r.status_code)
|
||||||
except Exception:
|
except Exception:
|
||||||
return PlainTextResponse(r.text, status_code=r.status_code)
|
return PlainTextResponse(r.text, status_code=r.status_code)
|
||||||
|
|
||||||
@ -2700,7 +2802,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
client = app.state.HTTPX
|
client = app.state.HTTPX
|
||||||
r = await client.post(LLM_URL, json=passthrough)
|
r = await client.post(LLM_URL, json=passthrough)
|
||||||
try:
|
try:
|
||||||
return JSONResponse(r.json(), status_code=r.status_code)
|
data = r.json()
|
||||||
|
data = _coerce_text_toolcalls_to_openai(data)
|
||||||
|
return JSONResponse(data, status_code=r.status_code)
|
||||||
except Exception:
|
except Exception:
|
||||||
return PlainTextResponse(r.text, status_code=r.status_code)
|
return PlainTextResponse(r.text, status_code=r.status_code)
|
||||||
|
|
||||||
@ -2849,6 +2953,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
if LLM_FUNCTION_CALLING_MODE in ("native","auto") and stream:
|
if LLM_FUNCTION_CALLING_MODE in ("native","auto") and stream:
|
||||||
passthrough = dict(body); passthrough["messages"]=messages
|
passthrough = dict(body); passthrough["messages"]=messages
|
||||||
if images_b64: passthrough["images"]=images_b64
|
if images_b64: passthrough["images"]=images_b64
|
||||||
|
STREAM_TOOLCALL_COERCE = os.getenv("STREAM_TOOLCALL_COERCE","1").lower() not in ("0","false","no")
|
||||||
async def _aiter():
|
async def _aiter():
|
||||||
import asyncio, contextlib
|
import asyncio, contextlib
|
||||||
client = app.state.HTTPX
|
client = app.state.HTTPX
|
||||||
@ -2867,6 +2972,9 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
await q.put(b"__EOF__")
|
await q.put(b"__EOF__")
|
||||||
reader_task = asyncio.create_task(_reader())
|
reader_task = asyncio.create_task(_reader())
|
||||||
try:
|
try:
|
||||||
|
buf = ""
|
||||||
|
acc = ""
|
||||||
|
suppress = False
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
chunk = await asyncio.wait_for(q.get(), timeout=HEARTBEAT)
|
chunk = await asyncio.wait_for(q.get(), timeout=HEARTBEAT)
|
||||||
@ -2875,7 +2983,91 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
continue
|
continue
|
||||||
if chunk == b"__EOF__":
|
if chunk == b"__EOF__":
|
||||||
break
|
break
|
||||||
|
if not STREAM_TOOLCALL_COERCE:
|
||||||
yield chunk
|
yield chunk
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
buf += chunk.decode("utf-8", errors="ignore")
|
||||||
|
except Exception:
|
||||||
|
yield chunk
|
||||||
|
continue
|
||||||
|
# SSE events zijn gescheiden door een lege regel
|
||||||
|
while "\n\n" in buf:
|
||||||
|
event, buf = buf.split("\n\n", 1)
|
||||||
|
if not event:
|
||||||
|
continue
|
||||||
|
if event.startswith(":"):
|
||||||
|
yield (event + "\n\n").encode("utf-8")
|
||||||
|
continue
|
||||||
|
lines = event.splitlines()
|
||||||
|
data_lines = [ln[5:].lstrip() for ln in lines if ln.startswith("data:")]
|
||||||
|
if not data_lines:
|
||||||
|
if not suppress:
|
||||||
|
yield (event + "\n\n").encode("utf-8")
|
||||||
|
continue
|
||||||
|
data_s = "\n".join(data_lines).strip()
|
||||||
|
if data_s == "[DONE]":
|
||||||
|
yield b"data: [DONE]\n\n"
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
obj = json.loads(data_s)
|
||||||
|
except Exception:
|
||||||
|
if not suppress:
|
||||||
|
yield (event + "\n\n").encode("utf-8")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
ch0 = (obj.get("choices") or [{}])[0] or {}
|
||||||
|
delta = ch0.get("delta") or {}
|
||||||
|
except Exception:
|
||||||
|
delta = {}
|
||||||
|
# Als upstream al echte tool_calls streamt: pass-through
|
||||||
|
if isinstance(delta, dict) and delta.get("tool_calls"):
|
||||||
|
if not suppress:
|
||||||
|
yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8")
|
||||||
|
continue
|
||||||
|
content = (delta.get("content") if isinstance(delta, dict) else None)
|
||||||
|
if isinstance(content, str) and content:
|
||||||
|
acc += content
|
||||||
|
if "[TOOL_CALLS" in acc:
|
||||||
|
suppress = True # onderdruk de text-tag stream
|
||||||
|
calls = detect_toolcalls_any(acc) or []
|
||||||
|
if calls:
|
||||||
|
created = int(time.time())
|
||||||
|
chunk_id = obj.get("id") or f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
||||||
|
model_name = obj.get("model") or body.get("model") or "unknown"
|
||||||
|
tc_delta = []
|
||||||
|
for i, tc in enumerate(calls):
|
||||||
|
tcc = dict(tc)
|
||||||
|
tcc["index"] = i
|
||||||
|
tc_delta.append(tcc)
|
||||||
|
first = {
|
||||||
|
"id": chunk_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"role":"assistant", "tool_calls": tc_delta},
|
||||||
|
"finish_reason": None
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
second = {
|
||||||
|
"id": chunk_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created,
|
||||||
|
"model": model_name,
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {},
|
||||||
|
"finish_reason": "tool_calls"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
yield ("data: " + json.dumps(first, ensure_ascii=False) + "\n\n").encode("utf-8")
|
||||||
|
yield ("data: " + json.dumps(second, ensure_ascii=False) + "\n\n").encode("utf-8")
|
||||||
|
yield b"data: [DONE]\n\n"
|
||||||
|
return
|
||||||
|
if not suppress:
|
||||||
|
yield ("data: " + json.dumps(obj, ensure_ascii=False) + "\n\n").encode("utf-8")
|
||||||
finally:
|
finally:
|
||||||
reader_task.cancel()
|
reader_task.cancel()
|
||||||
with contextlib.suppress(Exception):
|
with contextlib.suppress(Exception):
|
||||||
@ -2895,31 +3087,46 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
if images_b64: passthrough["images"]=images_b64
|
if images_b64: passthrough["images"]=images_b64
|
||||||
r = await client.post(LLM_URL, json=passthrough)
|
r = await client.post(LLM_URL, json=passthrough)
|
||||||
try:
|
try:
|
||||||
return JSONResponse(r.json(), status_code=r.status_code)
|
data = r.json()
|
||||||
|
data = _coerce_text_toolcalls_to_openai(data)
|
||||||
|
return JSONResponse(data, status_code=r.status_code)
|
||||||
except Exception:
|
except Exception:
|
||||||
return PlainTextResponse(r.text, status_code=r.status_code)
|
return PlainTextResponse(r.text, status_code=r.status_code)
|
||||||
|
|
||||||
# (A) 1e call: vraag de LLM om tool_calls (geen stream)
|
# Relay-modus: iteratief tools uitvoeren totdat de LLM stopt met tool_calls
|
||||||
first_req = dict(body)
|
max_rounds = int(os.getenv("LLM_TOOL_MAX_ROUNDS", "5"))
|
||||||
first_req["messages"] = messages
|
follow_messages = messages
|
||||||
first_req["stream"] = False
|
last_status = None
|
||||||
if images_b64: first_req["images"] = images_b64
|
for _round in range(max_rounds):
|
||||||
r1 = await client.post(LLM_URL, json=first_req)
|
req_i = dict(body)
|
||||||
|
req_i["messages"] = follow_messages
|
||||||
|
req_i["stream"] = False
|
||||||
|
if images_b64: req_i["images"] = images_b64
|
||||||
|
r_i = await client.post(LLM_URL, json=req_i)
|
||||||
|
last_status = r_i.status_code
|
||||||
try:
|
try:
|
||||||
data1 = r1.json()
|
data_i = r_i.json()
|
||||||
except Exception:
|
except Exception:
|
||||||
return PlainTextResponse(r1.text, status_code=r1.status_code)
|
return PlainTextResponse(r_i.text, status_code=r_i.status_code)
|
||||||
msg1 = ((data1.get("choices") or [{}])[0].get("message") or {})
|
msg_i = ((data_i.get("choices") or [{}])[0].get("message") or {})
|
||||||
tool_calls = msg1.get("tool_calls") or []
|
tool_calls = msg_i.get("tool_calls") or []
|
||||||
|
# Fallback: sommige backends gooien toolcalls als tekst (bv. [TOOL_CALLS])
|
||||||
|
if not tool_calls:
|
||||||
|
txt = (msg_i.get("content") or "")
|
||||||
|
tool_calls = detect_toolcalls_any(txt) or []
|
||||||
# Geen tool-calls? Geef direct door.
|
# Geen tool-calls? Geef direct door.
|
||||||
if not tool_calls:
|
if not tool_calls:
|
||||||
return JSONResponse(data1, status_code=r1.status_code)
|
data_i = _coerce_text_toolcalls_to_openai(data_i)
|
||||||
|
return JSONResponse(data_i, status_code=r_i.status_code)
|
||||||
|
|
||||||
# (B) voer tool_calls lokaal uit
|
# Tools uitvoeren
|
||||||
tool_msgs = []
|
tool_msgs = []
|
||||||
for tc in tool_calls:
|
for tc in tool_calls:
|
||||||
|
# Normaliseer tc structuur
|
||||||
|
tc_id = (tc or {}).get("id") or f"call_{uuid.uuid4().hex[:8]}"
|
||||||
fn = ((tc or {}).get("function") or {})
|
fn = ((tc or {}).get("function") or {})
|
||||||
tname = fn.get("name")
|
tname = fn.get("name")
|
||||||
|
logger.info(f"Running tool: '{tname}'")
|
||||||
raw_args = fn.get("arguments") or "{}"
|
raw_args = fn.get("arguments") or "{}"
|
||||||
try:
|
try:
|
||||||
args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {})
|
args = json.loads(raw_args) if isinstance(raw_args, str) else (raw_args or {})
|
||||||
@ -2934,26 +3141,22 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
out = {"error": str(e)}
|
out = {"error": str(e)}
|
||||||
tool_msgs.append({
|
tool_msgs.append({
|
||||||
"role": "tool",
|
"role": "tool",
|
||||||
"tool_call_id": tc.get("id"),
|
"tool_call_id": tc_id,
|
||||||
"name": tname or "unknown",
|
"name": tname or "unknown",
|
||||||
"content": json.dumps(out, ensure_ascii=False)
|
"content": json.dumps(out, ensure_ascii=False)
|
||||||
})
|
})
|
||||||
|
# Zorg dat assistant tool_calls een id heeft
|
||||||
|
if isinstance(tc, dict) and not tc.get("id"):
|
||||||
|
tc["id"] = tc_id
|
||||||
|
|
||||||
# (C) 2e call: geef tool outputs terug aan LLM voor eindantwoord
|
follow_messages = follow_messages + [
|
||||||
follow_messages = messages + [
|
|
||||||
{"role": "assistant", "tool_calls": tool_calls},
|
{"role": "assistant", "tool_calls": tool_calls},
|
||||||
*tool_msgs
|
*tool_msgs
|
||||||
]
|
]
|
||||||
second_req = dict(body)
|
|
||||||
second_req["messages"] = follow_messages
|
# Te veel tool-rondes → stop om loops te voorkomen
|
||||||
second_req["stream"] = False
|
safe_msg = f"Te veel tool-rondes ({max_rounds}). Stop om loop te voorkomen."
|
||||||
# images opnieuw meesturen is niet nodig, maar kan geen kwaad:
|
return JSONResponse(_openai_chat_response(model, safe_msg, follow_messages), status_code=(last_status or 200))
|
||||||
if images_b64: second_req["images"] = images_b64
|
|
||||||
r2 = await client.post(LLM_URL, json=second_req)
|
|
||||||
try:
|
|
||||||
return JSONResponse(r2.json(), status_code=r2.status_code)
|
|
||||||
except Exception:
|
|
||||||
return PlainTextResponse(r2.text, status_code=r2.status_code)
|
|
||||||
|
|
||||||
# shim (non-stream)
|
# shim (non-stream)
|
||||||
if LLM_FUNCTION_CALLING_MODE == "shim" and not stream:
|
if LLM_FUNCTION_CALLING_MODE == "shim" and not stream:
|
||||||
@ -2967,7 +3170,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
"Otherwise reply with ONLY: {\"final_answer\":\"...\"}\n\nTools:\n" + "\n".join(tool_lines))
|
"Otherwise reply with ONLY: {\"final_answer\":\"...\"}\n\nTools:\n" + "\n".join(tool_lines))
|
||||||
decide = await llm_call_openai_compat(
|
decide = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":sys}] + messages,
|
[{"role":"system","content":sys}] + messages,
|
||||||
stream=False, temperature=float(body.get("temperature",0.2)),
|
stream=False, temperature=float(body.get("temperature",0.02)),
|
||||||
top_p=float(body.get("top_p",0.9)), max_tokens=min(512, int(body.get("max_tokens",1024))),
|
top_p=float(body.get("top_p",0.9)), max_tokens=min(512, int(body.get("max_tokens",1024))),
|
||||||
extra=extra_payload if extra_payload else None
|
extra=extra_payload if extra_payload else None
|
||||||
)
|
)
|
||||||
@ -3006,7 +3209,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
]
|
]
|
||||||
final = await llm_call_openai_compat(
|
final = await llm_call_openai_compat(
|
||||||
follow, stream=False,
|
follow, stream=False,
|
||||||
temperature=float(body.get("temperature",0.2)),
|
temperature=float(body.get("temperature",0.02)),
|
||||||
top_p=float(body.get("top_p",0.9)),
|
top_p=float(body.get("top_p",0.9)),
|
||||||
max_tokens=int(body.get("max_tokens",1024)),
|
max_tokens=int(body.get("max_tokens",1024)),
|
||||||
extra=extra_payload if extra_payload else None
|
extra=extra_payload if extra_payload else None
|
||||||
@ -3023,7 +3226,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
LLM_WINDOWING_ENABLE = os.getenv("LLM_WINDOWING_ENABLE", "1").lower() not in ("0","false","no")
|
LLM_WINDOWING_ENABLE = os.getenv("LLM_WINDOWING_ENABLE", "1").lower() not in ("0","false","no")
|
||||||
MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "13021"))
|
MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "13021"))
|
||||||
RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
|
RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
|
||||||
temperature = float(body.get("temperature", 0.2))
|
temperature = float(body.get("temperature", 0.02))
|
||||||
top_p = float(body.get("top_p", 0.9))
|
top_p = float(body.get("top_p", 0.9))
|
||||||
# respecteer env-override voor default
|
# respecteer env-override voor default
|
||||||
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024"))
|
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024"))
|
||||||
@ -3051,7 +3254,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
{"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
|
{"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
|
||||||
{"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
|
{"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
|
||||||
]
|
]
|
||||||
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300)
|
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300)
|
||||||
return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
|
return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
|
||||||
trimmed_stream_msgs = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
|
trimmed_stream_msgs = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
|
||||||
new_summary = getattr(win, "running_summary", running_summary)
|
new_summary = getattr(win, "running_summary", running_summary)
|
||||||
@ -3116,7 +3319,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
else:
|
else:
|
||||||
# --- ÉCHTE streaming (geen tools): direct passthrough met heartbeats ---
|
# --- ÉCHTE streaming (geen tools): direct passthrough met heartbeats ---
|
||||||
if stream:
|
if stream:
|
||||||
temperature = float(body.get("temperature", 0.2))
|
temperature = float(body.get("temperature", 0.02))
|
||||||
top_p = float(body.get("top_p", 0.9))
|
top_p = float(body.get("top_p", 0.9))
|
||||||
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021"))
|
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021"))
|
||||||
max_tokens = int(body.get("max_tokens", _default_max))
|
max_tokens = int(body.get("max_tokens", _default_max))
|
||||||
@ -3138,7 +3341,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "42000"))
|
MAX_CTX_TOKENS = int(os.getenv("LLM_CONTEXT_TOKENS", "42000"))
|
||||||
RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
|
RESP_RESERVE = int(os.getenv("LLM_RESPONSE_RESERVE", "1024"))
|
||||||
MAX_AUTOCONT = int(os.getenv("LLM_AUTO_CONTINUES", "2"))
|
MAX_AUTOCONT = int(os.getenv("LLM_AUTO_CONTINUES", "2"))
|
||||||
temperature = float(body.get("temperature", 0.2))
|
temperature = float(body.get("temperature", 0.02))
|
||||||
top_p = float(body.get("top_p", 0.9))
|
top_p = float(body.get("top_p", 0.9))
|
||||||
# Laat env de default bepalen, zodat OWUI niet hard op 1024 blijft hangen
|
# Laat env de default bepalen, zodat OWUI niet hard op 1024 blijft hangen
|
||||||
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "42000"))
|
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "42000"))
|
||||||
@ -3165,7 +3368,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
{"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
|
{"role":"system","content":"Je bent een bondige notulist. Vat samen in max 10 bullets (feiten/besluiten/acties)."},
|
||||||
{"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
|
{"role":"user","content": f"Vorige samenvatting:\n{old}\n\nNieuwe geschiedenis:\n{chunk_text}\n\nGeef geüpdatete samenvatting."}
|
||||||
]
|
]
|
||||||
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=300)
|
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=300)
|
||||||
return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
|
return (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content", old or "")
|
||||||
trimmed = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
|
trimmed = await win.build_within_budget(system_prompt=None, summarizer=_summarizer)
|
||||||
new_summary = getattr(win, "running_summary", running_summary)
|
new_summary = getattr(win, "running_summary", running_summary)
|
||||||
@ -3309,7 +3512,7 @@ async def _summarize_files_llm(items: list[tuple[str, str]]) -> dict[str, str]:
|
|||||||
{"role":"user","content": f"Pad: {path}\n\nInhoud (ingekort):\n{snippet}\n\nAntwoord: "}
|
{"role":"user","content": f"Pad: {path}\n\nInhoud (ingekort):\n{snippet}\n\nAntwoord: "}
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.1, top_p=1.0, max_tokens=64)
|
resp = await llm_call_openai_compat(prompt, stream=False, temperature=0.01, top_p=1.0, max_tokens=64)
|
||||||
summ = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content","").strip()
|
summ = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content","").strip()
|
||||||
except Exception:
|
except Exception:
|
||||||
summ = ""
|
summ = ""
|
||||||
@ -3807,14 +4010,19 @@ async def rag_query_api(
|
|||||||
collection_name_eff = _collection_effective(collection_name)
|
collection_name_eff = _collection_effective(collection_name)
|
||||||
col = _get_collection(collection_name_eff)
|
col = _get_collection(collection_name_eff)
|
||||||
q_emb = _EMBEDDER.embed_query(query)
|
q_emb = _EMBEDDER.embed_query(query)
|
||||||
where = {}
|
# Chroma: $and/$or moet >=2 where-expressies bevatten.
|
||||||
|
conds = []
|
||||||
if repo:
|
if repo:
|
||||||
# Accepteer zowel 'repo' (basename) als 'repo_full' (owner/repo)
|
conds.append({"repo_full": {"$eq": repo}})
|
||||||
base = repo.rsplit("/", 1)[-1]
|
if branch:
|
||||||
where = {"$and": [
|
conds.append({"branch": {"$eq": branch}})
|
||||||
{"repo_full": {"$eq": repo}}
|
if profile:
|
||||||
]}
|
conds.append({"profile": {"$eq": profile}})
|
||||||
if profile: where["profile"] = {"$eq": profile}
|
where = None
|
||||||
|
if len(conds) == 1:
|
||||||
|
where = conds[0]
|
||||||
|
elif len(conds) >= 2:
|
||||||
|
where = {"$and": conds}
|
||||||
|
|
||||||
# ---- symbol hit set (repo-scoped) ----
|
# ---- symbol hit set (repo-scoped) ----
|
||||||
sym_hit_keys: set[str] = set()
|
sym_hit_keys: set[str] = set()
|
||||||
@ -4113,7 +4321,7 @@ async def rag_query_api(
|
|||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":"You are precise and return only valid JSON."},
|
[{"role":"system","content":"You are precise and return only valid JSON."},
|
||||||
{"role":"user","content": prompt+"\n\nOnly JSON array."}],
|
{"role":"user","content": prompt+"\n\nOnly JSON array."}],
|
||||||
stream=False, temperature=0.0, top_p=1.0, max_tokens=1024
|
stream=False, temperature=0.01, top_p=1.0, max_tokens=1024
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]"))
|
order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]"))
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=1 -e LLM_CONTEXT_TOKENS=16288 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=2 -e LLM_FUNCTION_CALLING_MODE=shim -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 mistral-api
|
docker run -d --rm --name mistral-api --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=0 -e LLM_CONTEXT_TOKENS=42000 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=0 -e LLM_FUNCTION_CALLING_MODE=auto -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -e FORCE_ALL_TOOLS=0 -e AUTO_CONTINUE=0 -e STREAM_PREFER_DIRECT=1 mistral-api
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user