diff --git a/Dockerfile b/Dockerfile index b3194c0..bb7b046 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,11 +2,31 @@ FROM python:3.11-slim WORKDIR /app +# ===== Model caches op vaste paden (blijven in image) ===== +# Hugging Face caches (embeddings) + XDG cache (o.a. whisper) +ENV HF_HOME=/opt/hf \ + HUGGINGFACE_HUB_CACHE=/opt/hf \ + TRANSFORMERS_CACHE=/opt/hf \ + SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \ + XDG_CACHE_HOME=/opt/cache \ + STT_MODEL=small + +# Optioneel build-args om modelkeuzes te pinnen +ARG RAG_EMBEDDINGS=gte-multilingual # of: bge-small / e5-small / gte-base-en +ARG STT_MODEL_ARG=small # tiny | base | small | medium | large-v3, etc. +ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS} +ENV STT_MODEL=${STT_MODEL_ARG} + +# maak directories nu al aan (rechten) +RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \ + chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper + COPY requirements.txt . -RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils +RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential RUN pip install --upgrade pip RUN pip install --no-cache-dir -r requirements.txt RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25 +#RUN pip cache purge RUN apt-get update && apt-get install -y --no-install-recommends \ wget ca-certificates libstdc++6 libatomic1 \ @@ -19,12 +39,46 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ln -sf /opt/piper/piper /usr/local/bin/piper; \ rm -f /tmp/piper.tgz +# ===== Prefetch modellen tijdens de build ===== +# 1) SentenceTransformers (embeddings) — volgens je mapping in app.py +RUN python - <<'PY' +import os +from sentence_transformers import SentenceTransformer +mapping = { + "gte-multilingual": ("Alibaba-NLP/gte-multilingual-base"), + "bge-small": ("BAAI/bge-small-en-v1.5"), + "e5-small": ("intfloat/e5-small-v2"), + "gte-base-en": ("thenlper/gte-base"), +} +choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower() +hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5") +# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet: +SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers")) +print("Prefetched SentenceTransformer:", hf_id) +PY + +# 2) faster-whisper (STT) — cache in /opt/cache/whisper +RUN python - <<'PY' +import os +from faster_whisper import WhisperModel +name = os.environ.get("STT_MODEL","small") +cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper") +os.makedirs(cache_root, exist_ok=True) +# Build-time altijd CPU/INT8 (geen GPU nodig tijdens build) +_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root) +print("Prefetched faster-whisper:", name, "->", cache_root) +PY + +# (optioneel) piper voice kun je hier ook voorcachen; laat ik nu achterwege omdat voice per omgeving wisselt. + + COPY app.py . COPY queue_helper.py . COPY agent_repo.py . COPY windowing_utils.py . COPY smart_rag.py . +COPY llm_client . EXPOSE 8080 diff --git a/agent_repo.py b/agent_repo.py index 5eeb047..14ea4ce 100644 --- a/agent_repo.py +++ b/agent_repo.py @@ -18,7 +18,7 @@ from windowing_utils import approx_token_count from starlette.concurrency import run_in_threadpool import asyncio from collections import defaultdict - +from llm_client import _llm_call # --- Async I/O executors (voorkom event-loop blocking) --- from concurrent.futures import ThreadPoolExecutor @@ -59,7 +59,59 @@ _meili_search_fn = None _GRAPH_CACHE: dict[str, dict[str, set[str]]] = {} _TREE_SUM_CACHE: dict[str, dict[str, str]] = {} +# --------------------------------------------------------- +# Fast-path helpers: expliciete paden + vervangpaar (old->new) +# --------------------------------------------------------- +_Q = r"[\"'“”‘’`]" +_PATH_PATS = [ + r"[\"“”'](resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)[\"”']", + r"(resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)", + r"[\"“”'](app\/[A-Za-z0-9_\/\.-]+\.php)[\"”']", + r"(app\/[A-Za-z0-9_\/\.-]+\.php)", +] +_TRANS_WRAPPERS = [ + r"__\(\s*{q}(.+?){q}\s*\)".format(q=_Q), + r"@lang\(\s*{q}(.+?){q}\s*\)".format(q=_Q), + r"trans\(\s*{q}(.+?){q}\s*\)".format(q=_Q), +] +def _extract_repo_branch_from_text(txt: str) -> Tuple[Optional[str], str]: + repo_url, branch = None, "main" + m = re.search(r"\bRepo\s*:\s*(\S+)", txt, flags=re.I) + if m: repo_url = m.group(1).strip() + mb = re.search(r"\bbranch\s*:\s*([A-Za-z0-9._/-]+)", txt, flags=re.I) + if mb: branch = mb.group(1).strip() + return repo_url, branch + +def _extract_explicit_paths(txt: str) -> List[str]: + out = [] + for pat in _PATH_PATS: + for m in re.finditer(pat, txt): + p = m.group(1) + if p and p not in out: + out.append(p) + return out + +def _extract_replace_pair(txt: str) -> Tuple[Optional[str], Optional[str]]: + # NL/EN varianten + “slimme” quotes + pats = [ + rf"Vervang\s+de\s+tekst\s*{_Q}(.+?){_Q}[^.\n]*?(?:in|naar|verander(?:en)?\s+in)\s*{_Q}(.+?){_Q}", + rf"Replace(?:\s+the)?\s+text\s*{_Q}(.+?){_Q}\s*(?:to|with)\s*{_Q}(.+?){_Q}", + ] + for p in pats: + m = re.search(p, txt, flags=re.I|re.S) + if m: + return m.group(1), m.group(2) + mm = re.search(r"(Vervang|Replace)[\s\S]*?"+_Q+"(.+?)"+_Q+"[\s\S]*?"+_Q+"(.+?)"+_Q, txt, flags=re.I) + if mm: + return mm.group(2), mm.group(3) + return None, None + +def _looks_like_unified_diff_request(txt: str) -> bool: + if re.search(r"\bunified\s+diff\b", txt, flags=re.I): return True + if re.search(r"\b(diff|patch)\b", txt, flags=re.I) and _extract_explicit_paths(txt): + return True + return False # zet dit dicht bij de andere module-consts async def _call_get_git_repo(repo_url: str, branch: str): @@ -2873,7 +2925,7 @@ def laravel_scan_routes(repo_root: Path) -> list[dict]: if not p.exists(): continue txt = _read_file_safe(p) - for m in re.finditer(r"Route::(get|post|put|patch|delete|match|resource)\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*([^)]+)\)", txt, flags=re.I): + for m in re.finditer(r"Route::(get|post|put|patch|delete|match)\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*([^)]+)\)", txt, flags=re.I): verb, uri, target = m.group(1).lower(), m.group(2), m.group(3) ctrl = None; method = None; name = None # controller@method @@ -3926,13 +3978,122 @@ async def _llm_task_route(user_goal: str, framework: str = "laravel") -> dict: # ---------- Hoofd-handler ---------- async def handle_repo_agent(messages: List[dict], request) -> str: + """ + Uitbreiding: fast-path voor unified diffs op expliciete bestanden met tekstvervanging. + Als niet van toepassing, valt automatisch terug op de bestaande flow. + """ + # 1) Combineer user/system content om opdracht te parsen + try: + full_txt = "\n".join([m.get("content","") for m in messages if m.get("role") in ("system","user")]) + except Exception: + full_txt = "" + + # 2) Herken fast-path + try_fast = _looks_like_unified_diff_request(full_txt) + paths_fp = _extract_explicit_paths(full_txt) if try_fast else [] + old_txt, new_txt = _extract_replace_pair(full_txt) if try_fast else (None, None) + + # NB: we gebruiken de injecties die via initialize_agent zijn gezet: + # - get_git_repo_fn (async) + # - read_text_file_fn (sync) + # Deze symbolen worden onderin initialize_agent aan globals() gehangen. + get_git_repo_fn = globals().get("get_git_repo_fn") + read_text_file_fn = globals().get("read_text_file_fn") + + if try_fast and paths_fp and old_txt and new_txt and callable(get_git_repo_fn) and callable(read_text_file_fn): + # 3) repo + branch bepalen + repo_url, branch = _extract_repo_branch_from_text(full_txt) + if not repo_url: + # fallback: probeer repo uit eerdere agent-state (optioneel), anders stop fast-path + repo_url = globals().get("_last_repo_url") + branch = globals().get("_last_branch", "main") + if repo_url: + try: + repo_root = await get_git_repo_fn(repo_url, branch or "main") + root = Path(repo_root) + lang_path = root / "resources" / "lang" / "nl.json" + lang_before = lang_path.read_text(encoding="utf-8", errors="ignore") if lang_path.exists() else "{}" + lang_data = {} + try: + lang_data = json.loads(lang_before or "{}") + except Exception: + lang_data = {} + + diffs_out = [] + lang_changed = False + + def _make_udiff(a: str, b: str, rel: str) -> str: + return "".join(difflib.unified_diff( + a.splitlines(keepends=True), + b.splitlines(keepends=True), + fromfile=f"a/{rel}", tofile=f"b/{rel}", n=3 + )) + + # 4) per bestand: ofwel inline replace, ofwel vertaling bijwerken + for rel in paths_fp: + p = root / rel + if not p.exists(): + continue + before = read_text_file_fn(p) + if not before: + continue + # Als de 'oude' tekst voorkomt BINNEN een vertaalwrapper, dan géén blade-edit + found_in_wrapper = False + for pat in _TRANS_WRAPPERS: + for m in re.finditer(pat, before): + inner = m.group(1) + if inner == old_txt: + found_in_wrapper = True + break + if found_in_wrapper: + break + if found_in_wrapper: + # update nl.json: {"oude": "nieuwe"} + if lang_data.get(old_txt) != new_txt: + lang_data[old_txt] = new_txt + lang_changed = True + continue + + # anders: directe, exacte vervanging (conservatief) + after = before.replace(old_txt, new_txt) + if after != before: + diff = _make_udiff(before, after, rel) + if diff.strip(): + diffs_out.append(("blade", rel, diff)) + + # 5) indien vertaling gewijzigd: diff voor nl.json toevoegen + if lang_changed: + new_lang = json.dumps(lang_data, ensure_ascii=False, indent=2, sort_keys=True) + "\n" + diff_lang = _make_udiff(lang_before if isinstance(lang_before, str) else "", new_lang, "resources/lang/nl.json") + if diff_lang.strip(): + diffs_out.append(("lang", "resources/lang/nl.json", diff_lang)) + if diffs_out: + parts = ["### Unified diffs"] + for kind, rel, d in diffs_out: + parts.append(f"**{rel}**") + parts.append("```diff\n" + d + "```") + return "\n\n".join(parts) + else: + return "Dry-run: geen wijzigbare treffers gevonden in opgegeven bestanden (of reeds actueel)." + except Exception as e: + # mislukt → val terug op bestaande discover/agent flow + pass + + # === GEEN fast-path → ga door met de bestaande flow hieronder === + sid = _get_session_id(messages, request) st = _app.state.AGENT_SESSIONS.get(sid) or AgentState() _app.state.AGENT_SESSIONS[sid] = st user_last = next((m["content"] for m in reversed(messages) if m.get("role")=="user"), "").strip() user_last_lower = user_last.lower() logger.info("INFO:agent_repo:[%s] stage=%s", sid, st.stage) - from smart_rag import enrich_intent, expand_queries, hybrid_retrieve + from smart_rag import ( + enrich_intent, + expand_queries, + hybrid_retrieve, + _laravel_pairs_from_route_text, + _laravel_guess_view_paths_from_text, + ) # Als user een .git URL meegeeft: zet state en ga via de state-machine verder user_txt = next((m.get("content","") for m in reversed(messages) if m.get("role")=="user"), "") repo_url = await _detect_repo_url(user_txt) @@ -4287,21 +4448,18 @@ async def handle_repo_agent(messages: List[dict], request) -> str: picked.append(f) # --- VIEW/LANG bias voor UI-label wijzigingen --- - if task_type == "ui_label_change": - # Probeer de 'oude' literal uit de prompt te halen (voor gerichter filteren) - try: - old_lit, _new_lit, _why = deduce_old_new_literals(st.user_goal, "") - except Exception: - old_lit = None + # Pak de eerste quote uit de prompt als "oude" literal + qs = extract_quotes(st.user_goal) or [] + old_lit = qs[0] if qs else None - def _contains_old(rel: str) -> bool: - if not old_lit: - return True - try: - txt = _read_text_file(Path(st.repo_path)/rel) or "" - return old_lit in txt - except Exception: - return False + def _contains_old(rel: str) -> bool: + if not old_lit: + return True # fallback: geen filtering + try: + txt = _read_text_file(Path(st.repo_path) / rel) or "" + return old_lit in txt + except Exception: + return False view_files = [f for f in all_files if f.startswith("resources/views/") and f.endswith(".blade.php")] @@ -4374,25 +4532,15 @@ async def handle_repo_agent(messages: List[dict], request) -> str: # routes -> controllers if rel in ("routes/web.php","routes/api.php"): txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore") - try: - from app import _laravel_pairs_from_route_text # of waar je helper staat - except Exception: - _laravel_pairs_from_route_text = None - if _laravel_pairs_from_route_text: - for ctrl_path,_m in _laravel_pairs_from_route_text(txt): - if ctrl_path and ctrl_path not in picked and ctrl_path not in add: - add.append(ctrl_path) + for ctrl_path, _m in _laravel_pairs_from_route_text(txt): + if ctrl_path and ctrl_path not in picked and ctrl_path not in add: + add.append(ctrl_path) # controllers -> views if rel.startswith("app/Http/Controllers/") and rel.endswith(".php"): txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore") - try: - from app import _laravel_guess_view_paths_from_text - except Exception: - _laravel_guess_view_paths_from_text = None - if _laravel_guess_view_paths_from_text: - for v in _laravel_guess_view_paths_from_text(txt): - if v and v not in picked and v not in add: - add.append(v) + for v in _laravel_guess_view_paths_from_text(txt): + if v and v not in picked and v not in add: + add.append(v) # Extra: neem kleine nabije partials/layouts mee (zelfde dir, ≤40KB) more = [] for rel in (picked + add)[:8]: @@ -4410,12 +4558,10 @@ async def handle_repo_agent(messages: List[dict], request) -> str: pass picked = (picked + add + more)[:MAX_FILES_DRYRUN] # 5) Literal-grep fallback: als de user een oud->nieuw wijziging impliceert, zoek de 'old' literal repo-breed - try: - old, new, _why_pair = deduce_old_new_literals(st.user_goal, "") - except Exception: - old, new = None, None - if old and isinstance(old, str) and old.strip(): - grep_hits = _grep_repo_for_literal(Path(st.repo_path), old.strip(), limit=16) + qs = extract_quotes(st.user_goal) or [] + old = qs[0].strip() if qs and qs[0].strip() else None + if old: + grep_hits = _grep_repo_for_literal(Path(st.repo_path), old, limit=16) for rel in grep_hits: if rel in all_files and rel not in picked: picked.append(rel) @@ -4589,15 +4735,14 @@ async def handle_repo_agent(messages: List[dict], request) -> str: if best and best not in st.candidate_paths: added.append(best) st.candidate_paths = (added + st.candidate_paths)[:MAX_FILES_DRYRUN] # extra: grep op 'old' literal uit user_goal om kandidaten te verrijken - try: - old, new, _why_pair = deduce_old_new_literals(st.user_goal, "") - except Exception: - old = None + qs = extract_quotes(st.user_goal) or [] + old = qs[0].strip() if qs and qs[0].strip() else None if old: for rel in _grep_repo_for_literal(root, old, limit=16): if rel in all_files and rel not in st.candidate_paths: st.candidate_paths.append(rel) + try: proposed, diffs, reasons = await propose_patches_without_apply(st.repo_path, st.candidate_paths, st.user_goal) if not proposed: diff --git a/app.py b/app.py index d44c1ef..de778ed 100644 --- a/app.py +++ b/app.py @@ -32,7 +32,11 @@ from fastapi.routing import APIRoute from starlette.concurrency import run_in_threadpool from pydantic import BaseModel +AUTO_CONTINUE = os.getenv("AUTO_CONTINUE", "1").lower() not in ("0","false","no") +AUTO_CONTINUE_MAX_ROUNDS = int(os.getenv("AUTO_CONTINUE_MAX_ROUNDS", "6")) +AUTO_CONTINUE_TAIL_CHARS = int(os.getenv("AUTO_CONTINUE_TAIL_CHARS", "600")) +from llm_client import init_llm_client, _sync_model_infer # Optionele libs voor tekst-extractie try: @@ -71,6 +75,17 @@ except Exception: cairosvg = None import tempfile, subprocess # voor audio +import html # ← nieuw: unescape van HTML-entities in tool-call JSON + +# Forceer consistente caches zowel binnen als buiten Docker +os.environ.setdefault("HF_HOME", "/opt/hf") +os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/opt/hf") +os.environ.setdefault("TRANSFORMERS_CACHE", "/opt/hf") +os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers") +os.environ.setdefault("XDG_CACHE_HOME", "/opt/cache") +# whisper cache pad (gebruikt door faster-whisper) +os.environ.setdefault("WHISPER_CACHE_DIR", os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")) + # STT (Whisper-compatible via faster-whisper) — optioneel _STT_MODEL = None @@ -148,15 +163,53 @@ def _build_tools_system_prompt(tools: list) -> str: return "\n".join(lines) def _extract_tool_calls_from_text(txt: str): - # tolerant: pak eerste JSON object (ook als het in ```json staat) - m = re.search(r"\{[\s\S]*\}", txt or "") + s = (txt or "").strip() + # Strip code fences & bekende chat-tokens + if s.startswith("```"): + s = extract_code_block(s) + s = re.sub(r"^<\|im_start\|>\s*assistant\s*", "", s, flags=re.I) + # HTML-escaped JSON (" etc.) + try: + s = html.unescape(s) + except Exception: + pass + # tolerant: pak eerste JSON object + m = re.search(r"\{[\s\S]*\}", s) if not m: return [] try: obj = json.loads(m.group(0)) except Exception: return [] - tc = obj.get("tool_calls") or [] + # === Normaliseer verschillende dialecten === + # 1) OpenAI: {"tool_calls":[{"type":"function","function":{"name":..,"arguments":..}}]} + if "tool_calls" in obj and isinstance(obj["tool_calls"], list): + tc = obj["tool_calls"] + # 2) Replit/Magistral/Mistral-achtig: {"call_tool":{"name":"...","arguments":{...}}} óf lijst + elif "call_tool" in obj: + raw = obj["call_tool"] + if isinstance(raw, dict): + raw = [raw] + tc = [] + for it in (raw or []): + name = (it or {}).get("name") + args = (it or {}).get("arguments") or {} + if isinstance(args, str): + try: args = json.loads(args) + except Exception: pass + if name: + tc.append({"type":"function","function":{"name": name, "arguments": json.dumps(args, ensure_ascii=False)}}) + # 3) Legacy OpenAI: {"function_call":{"name":"...","arguments":"{...}"}} + elif "function_call" in obj and isinstance(obj["function_call"], dict): + fc = obj["function_call"] + name = fc.get("name") + args = fc.get("arguments") or {} + if isinstance(args, str): + try: args = json.loads(args) + except Exception: pass + tc = [{"type":"function","function":{"name": name, "arguments": json.dumps(args, ensure_ascii=False)}}] if name else [] + else: + tc = [] out = [] for c in tc: name = (c or {}).get("name") @@ -166,6 +219,13 @@ def _extract_tool_calls_from_text(txt: str): args = json.loads(args) except Exception: args = {} + # Support zowel {"name":..,"arguments":..} als {"type":"function","function":{...}} + if not name and isinstance(c.get("function"), dict): + name = c["function"].get("name") + args = c["function"].get("arguments") or args + if isinstance(args, str): + try: args = json.loads(args) + except Exception: args = {} if name: out.append({ "id": f"call_{uuid.uuid4().hex[:8]}", @@ -177,6 +237,46 @@ def _extract_tool_calls_from_text(txt: str): }) return out +# --- Universal toolcall detector (JSON + ReAct + HTML-escaped) --- +_TOOL_REACT = re.compile( + r"Action\s*:\s*([A-Za-z0-9_\-\.]+)\s*[\r\n]+Action\s*Input\s*:\s*(?:```json\s*([\s\S]*?)\s*```|([\s\S]*))", + re.I +) + +def detect_toolcalls_any(text: str) -> list[dict]: + """ + Retourneert altijd OpenAI-achtige tool_calls items: + [{"id":..., "type":"function", "function":{"name":..., "arguments": "{...}"}}] + """ + calls = _extract_tool_calls_from_text(text) + if calls: + return calls + s = (text or "").strip() + try: + s = html.unescape(s) + except Exception: + pass + # ReAct fallback + m = _TOOL_REACT.search(s) + if m: + name = (m.group(1) or "").strip() + raw = (m.group(2) or m.group(3) or "").strip() + # strip eventuele fences + if raw.startswith("```"): + raw = extract_code_block(raw) + try: + args = json.loads(raw) if raw else {} + except Exception: + # laatste redmiddel: wikkel als {"input": ""} + args = {"input": raw} + if name: + return [{ + "id": f"call_{uuid.uuid4().hex[:8]}", + "type": "function", + "function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)} + }] + return [] + # ----------------------------------------------------------------------------- # App & logging # ----------------------------------------------------------------------------- @@ -197,6 +297,21 @@ app.add_middleware( allow_headers=["*"], ) + +# Eén centrale QueueManager voor ALLE LLM-calls +#app.state.LLM_QUEUE = QueueManager(model_infer_fn=_sync_model_infer) + +# Koppel deze queue aan llm_client zodat _llm_call dezelfde queue gebruikt +#init_llm_client(app.state.LLM_QUEUE) + +# Let op: +# - llm_call_openai_compat verwacht dat app.state.LLM_QUEUE een deque-achtige queue is +# (append/index/popleft/remove). +# - De QueueManager uit queue_helper.py is thread-based en wordt hier niet gebruikt. +# Als je QueueManager wilt inzetten, doe dat in llm_client.py of elders, +# maar niet als app.state.LLM_QUEUE, om type-conflicten te voorkomen. + + @app.on_event("startup") async def _startup(): # Zorg dat lokale hosts nooit via een proxy gaan @@ -697,7 +812,7 @@ async def llm_call_openai_compat( stream: bool = False, temperature: float = 0.2, top_p: float = 0.9, - max_tokens: int = 1024, + max_tokens: int = 13027, extra: Optional[dict] = None, stop: Optional[Union[str, list[str]]] = None, **kwargs @@ -926,7 +1041,7 @@ async def _svg_from_prompt(prompt: str, w: int, h: int, background: str="white") f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.") resp = await llm_call_openai_compat( [{"role":"system","content":sys},{"role":"user","content":user}], - stream=False, temperature=0.35, top_p=0.9, max_tokens=1200 + stream=False, temperature=0.35, top_p=0.9, max_tokens=2048 ) svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","") return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background) @@ -1079,6 +1194,165 @@ def _parse_repo_qa_from_messages(messages: list[dict]) -> tuple[Optional[str], s question = full return repo_hint, question, branch, n_ctx +# ------------------------------ +# Unified-diff "fast path" (expliciete paden + vervangtekst) +# ------------------------------ +_Q = r"[\"'“”‘’`]" +_PATH_PATS = [ + r"[\"“”'](resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)[\"”']", + r"(resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)", + r"[\"“”'](app\/[A-Za-z0-9_\/\.-]+\.php)[\"”']", + r"(app\/[A-Za-z0-9_\/\.-]+\.php)", +] + +def _extract_repo_branch(text: str) -> tuple[Optional[str], str]: + repo_url = None + branch = "main" + m = re.search(r"\bRepo\s*:\s*(\S+)", text, flags=re.I) + if m: + repo_url = m.group(1).strip() + mb = re.search(r"\bbranch\s*:\s*([A-Za-z0-9._/-]+)", text, flags=re.I) + if mb: + branch = mb.group(1).strip() + return repo_url, branch + +def _extract_paths(text: str) -> list[str]: + paths: set[str] = set() + for pat in _PATH_PATS: + for m in re.finditer(pat, text): + paths.add(m.group(1)) + return list(paths) + +def _extract_replace_pair(text: str) -> tuple[Optional[str], Optional[str]]: + """ + Haalt (old,new) uit NL/EN varianten. Ondersteunt rechte en ‘slimme’ quotes. + Voorbeelden die matchen: + - Vervang de tekst “A” in “B” + - Vervang de tekst "A" veranderen in "B" + - Replace the text 'A' to 'B' + - Replace "A" with "B" + """ + pats = [ + rf"Vervang\s+de\s+tekst\s*{_Q}(.+?){_Q}[^.\n]*?(?:in|naar|verander(?:en)?\s+in)\s*{_Q}(.+?){_Q}", + rf"Replace(?:\s+the)?\s+text\s*{_Q}(.+?){_Q}\s*(?:to|with)\s*{_Q}(.+?){_Q}", + ] + for p in pats: + m = re.search(p, text, flags=re.I|re.S) + if m: + return m.group(1), m.group(2) + # fallback: eerste twee quoted strings in de buurt van 'Vervang' / 'Replace' + mm = re.search(r"(Vervang|Replace)[\s\S]*?"+_Q+"(.+?)"+_Q+"[\s\S]*?"+_Q+"(.+?)"+_Q, text, flags=re.I) + if mm: + return mm.group(2), mm.group(3) + return None, None + +def _looks_like_unified_diff_request(text: str) -> bool: + if re.search(r"\bunified\s+diff\b", text, flags=re.I): + return True + # ook accepteren bij "maak een diff" met expliciete bestanden + if re.search(r"\b(diff|patch)\b", text, flags=re.I) and any(re.search(p, text) for p in _PATH_PATS): + return True + return False + +def _make_unified_diff(a_text: str, b_text: str, path: str) -> str: + import difflib + a_lines = a_text.splitlines(keepends=True) + b_lines = b_text.splitlines(keepends=True) + return "".join(difflib.unified_diff(a_lines, b_lines, fromfile=f"a/{path}", tofile=f"b/{path}", n=3)) + +_TRANS_WRAPPERS = [ + r"__\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"), + r"@lang\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"), + r"trans\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"), +] + +def _find_in_translation_wrapper(content: str, needle: str) -> bool: + for pat in _TRANS_WRAPPERS: + for m in re.finditer(pat, content): + inner = m.group(1) + if inner == needle: + return True + return False + +def _update_nl_json(root: Path, key: str, value: str) -> tuple[bool, str]: + """ + Zet of update resources/lang/nl.json: { key: value } + Retourneert (gewijzigd, nieuwe_tekst). + """ + lang_path = root / "resources" / "lang" / "nl.json" + data = {} + if lang_path.exists(): + try: + data = json.loads(lang_path.read_text(encoding="utf-8", errors="ignore") or "{}") + except Exception: + data = {} + # Alleen aanpassen als nodig + prev = data.get(key) + if prev == value: + return False, lang_path.as_posix() + data[key] = value + # Mooie, stabiele JSON dump + lang_path.parent.mkdir(parents=True, exist_ok=True) + tmp = lang_path.with_suffix(".tmp.json") + tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") + os.replace(tmp, lang_path) + return True, lang_path.as_posix() + +async def _fast_unified_diff_task(messages: list[dict]) -> Optional[str]: + """ + Als de prompt vraagt om 'unified diff' met expliciete bestanden en vervangtekst, + voer dat direct uit (bypass handle_repo_agent) en geef diffs terug. + """ + full = "\n".join([m.get("content","") for m in messages if m.get("role") in ("system","user")]) + if not _looks_like_unified_diff_request(full): + return None + repo_url, branch = _extract_repo_branch(full) + paths = _extract_paths(full) + old, new = _extract_replace_pair(full) + if not (repo_url and paths and old and new): + return None + + # Repo ophalen + repo_path = await _get_git_repo_async(repo_url, branch) + root = Path(repo_path) + changed_files: list[tuple[str, str]] = [] # (path, diff_text) + lang_changed = False + lang_path_text = "" + + for rel in paths: + p = root / rel + if not p.exists(): + continue + before = _read_text_file_wrapper(p) + if not before: + continue + + # Als de tekst binnen een vertaal-wrapper staat, wijzig NIET de blade, + # maar zet/patch resources/lang/nl.json: { "oude": "nieuwe" }. + if _find_in_translation_wrapper(before, old): + ch, lang_path_text = _update_nl_json(root, old, new) + lang_changed = lang_changed or ch + # geen bladewijziging in dit geval + continue + + after = before.replace(old, new) + if after == before: + continue + diff = _make_unified_diff(before, after, rel) + if diff.strip(): + changed_files.append((rel, diff)) + + if not changed_files and not lang_changed: + return "Dry-run: geen wijzigbare treffers gevonden in opgegeven bestanden (of reeds actueel)." + + out = [] + if changed_files: + out.append("### Unified diffs") + for rel, d in changed_files: + out.append(f"```diff\n{d}```") + if lang_changed: + out.append(f"✅ Bijgewerkte vertaling in: `{lang_path_text}` → \"{old}\" → \"{new}\"") + return "\n\n".join(out).strip() # ------------------------------ # OpenAI-compatible endpoints @@ -1131,15 +1405,17 @@ def _get_stt_model(): except Exception: raise HTTPException(status_code=500, detail="STT niet beschikbaar: faster-whisper ontbreekt.") # device-selectie + download_root = os.getenv("STT_MODEL_DIR", os.environ.get("WHISPER_CACHE_DIR")) + os.makedirs(download_root, exist_ok=True) if STT_DEVICE == "auto": try: - _STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cuda", compute_type="float16") + _STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cuda", compute_type="float16", download_root=download_root) return _STT_MODEL except Exception: - _STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cpu", compute_type="int8") + _STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cpu", compute_type="int8", download_root=download_root) return _STT_MODEL dev, comp = ("cuda","float16") if STT_DEVICE=="cuda" else ("cpu","int8") - _STT_MODEL = WhisperModel(STT_MODEL_NAME, device=dev, compute_type=comp) + _STT_MODEL = WhisperModel(STT_MODEL_NAME, device=dev, compute_type=comp, download_root=download_root) return _STT_MODEL def _stt_transcribe_path(path: str, lang: str | None): @@ -1246,7 +1522,7 @@ async def present_make( f"Max. {max_slides} dia's, 3–6 bullets per dia.") plan = await llm_call_openai_compat( [{"role":"system","content":sys},{"role":"user","content":user}], - stream=False, temperature=0.3, top_p=0.9, max_tokens=1200 + stream=False, temperature=0.3, top_p=0.9, max_tokens=13021 ) raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}") try: @@ -1292,7 +1568,7 @@ async def vision_ask( stream: bool = Form(False), temperature: float = Form(0.2), top_p: float = Form(0.9), - max_tokens: int = Form(512), + max_tokens: int = Form(1024), ): raw = await run_in_threadpool(file.file.read) img_b64 = base64.b64encode(raw).decode("utf-8") @@ -1323,7 +1599,7 @@ async def vision_and_text( max_chars: int = Form(25000), temperature: float = Form(0.2), top_p: float = Form(0.9), - max_tokens: int = Form(1024), + max_tokens: int = Form(2048), ): images_b64: list[str] = [] text_chunks: list[str] = [] @@ -1370,7 +1646,7 @@ async def vision_health(): tiny_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAOaO5nYAAAAASUVORK5CYII=" try: messages = [{"role":"user","content":" Beschrijf dit in één woord."}] - resp = await llm_call_openai_compat(messages, extra={"images":[tiny_png]}, max_tokens=16) + resp = await llm_call_openai_compat(messages, extra={"images":[tiny_png]}, max_tokens=128) txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","").strip() return {"vision": bool(txt), "sample": txt[:60]} except Exception as e: @@ -1564,7 +1840,7 @@ async def _execute_tool(name: str, args: dict) -> dict: resp = await llm_call_openai_compat( [{"role":"system","content":"Je bent behulpzaam en exact."}, {"role":"user","content": f"{instruction}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}], - stream=False, max_tokens=768 + stream=False, max_tokens=7680 ) return resp @@ -1574,7 +1850,7 @@ async def _execute_tool(name: str, args: dict) -> dict: resp = await llm_call_openai_compat( [{"role":"system","content":"Wees feitelijk en concreet."}, {"role":"user","content": f"{goal}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}], - stream=False, max_tokens=768 + stream=False, max_tokens=7680 ) return resp @@ -1587,7 +1863,7 @@ async def _execute_tool(name: str, args: dict) -> dict: {"role":"user","content": f"{objective}\nStijl/criteria: {style}\n" "Antwoord met eerst een korte toelichting, daarna alleen de verbeterde inhoud tussen een codeblok.\n\n" f"--- BEGIN ---\n{text}\n--- EINDE ---"}], - stream=False, max_tokens=1024 + stream=False, max_tokens=10240 ) return resp @@ -1597,7 +1873,7 @@ async def _execute_tool(name: str, args: dict) -> dict: resp = await llm_call_openai_compat( [{"role":"system","content":"Wees strikt en concreet."}, {"role":"user","content": f"{VALIDATE_PROMPT}\n\nCode om te valideren:\n```\n{code}\n```"}], - stream=False, max_tokens=512 + stream=False, max_tokens=10000 ) txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","") return {"status":"issues_found" if _parse_validation_results(txt) else "valid", @@ -1611,7 +1887,7 @@ async def _execute_tool(name: str, args: dict) -> dict: [{"role":"system","content": SYSTEM_PROMPT}, {"role":"user","content": f"Verbeter deze {language}-code met focus op {focus}:\n\n" f"{code}\n\nGeef eerst een korte toelichting, dan alleen het verbeterde codeblok. Behoud functionaliteit."}], - stream=False, max_tokens=1536 + stream=False, max_tokens=10768 ) return resp @@ -1636,7 +1912,7 @@ async def _execute_tool(name: str, args: dict) -> dict: if name == "vision_analyze": image_url = args.get("image_url","") prompt = args.get("prompt","Beschrijf beknopt wat je ziet en noem de belangrijkste details.") - max_tokens = int(args.get("max_tokens",512)) + max_tokens = int(args.get("max_tokens",1024)) b64 = None # Alleen data: of raw base64 accepteren; http(s) niet, want die worden niet # ingeladen in de vision-call en zouden stil falen. @@ -1908,6 +2184,52 @@ async def _complete_with_autocontinue( continues += 1 return content, resp +async def llm_call_autocont( + messages: list[dict], + *, + model: Optional[str] = None, + stream: bool = False, + temperature: float = 0.2, + top_p: float = 0.9, + max_tokens: int = 1024, + extra: Optional[dict] = None, + stop: Optional[Union[str, list[str]]] = None, + **kwargs +) -> dict | StreamingResponse: + """ + OpenAI-compatibele wrapper die non-stream antwoorden automatisch + doorcontinueert met jouw _complete_with_autocontinue(...). + - stream=True -> passthrough naar llm_call_openai_compat (ongewijzigd) + - stream=False -> retourneert één samengevoegd antwoord als OpenAI JSON + """ + mdl = (model or os.getenv("LLM_MODEL", "mistral-medium")).strip() + if stream: + # streaming blijft 1-op-1 zoals voorheen + return await llm_call_openai_compat( + messages, + model=mdl, + stream=True, + temperature=temperature, + top_p=top_p, + max_tokens=max_tokens, + extra=extra if extra else None, + stop=stop + ) + + max_autocont = int(os.getenv("LLM_AUTO_CONTINUES", "2")) + full_text, _last = await _complete_with_autocontinue( + messages, + model=mdl, + temperature=temperature, + top_p=top_p, + max_tokens=max_tokens, + extra_payload=extra if extra else None, + max_autocont=max_autocont + ) + # Bouw een standaard OpenAI-chat response met het samengevoegde antwoord + return _openai_chat_response(mdl, full_text, messages) + + @app.post("/v1/chat/completions") async def openai_chat_completions(body: dict = Body(...), request: Request = None): model = (body.get("model") or os.getenv("LLM_MODEL", "mistral-medium")).strip() @@ -2053,7 +2375,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non # jouw bestaande helper; hou 'm zoals je al gebruikt resp = await llm_call_openai_compat(ask, stream=False, max_tokens=512) txt = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content", "") or "" - calls = _extract_tool_calls_from_text(txt) + calls = detect_toolcalls_any(txt) #_extract_tool_calls_from_text(txt) if calls: return { "id": f"chatcmpl-{uuid.uuid4().hex}", @@ -2075,6 +2397,17 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non # Speciale modellen if model == "repo-agent": + # === snelle bypass voor "unified diff" opdrachten met expliciete paden === + try: + fast = await _fast_unified_diff_task(messages) + except Exception as e: + fast = f"(Fast-diff pad mislukte: {e})" + if fast: + if stream: + async def _emit(): yield ("data: " + json.dumps({"id": f"chatcmpl-{uuid.uuid4().hex[:12]}", "object":"chat.completion.chunk","created": int(time.time()),"model":"repo-agent","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":None}]}) + "\n\n").encode("utf-8"); yield ("data: " + json.dumps({"id": f"chatcmpl-{uuid.uuid4().hex[:12]}","object":"chat.completion.chunk","created": int(time.time()),"model":"repo-agent","choices":[{"index":0,"delta":{"content": fast},"finish_reason":None}]}) + "\n\n").encode("utf-8"); yield b"data: [DONE]\n\n" + return StreamingResponse(_emit(), media_type="text/event-stream", headers={"Cache-Control":"no-cache, no-transform","X-Accel-Buffering":"no","Connection":"keep-alive"}) + return JSONResponse(_openai_chat_response("repo-agent", fast, messages)) + if stream: async def event_gen(): import asyncio, time, json, contextlib @@ -2421,7 +2754,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non if stream: temperature = float(body.get("temperature", 0.2)) top_p = float(body.get("top_p", 0.9)) - _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024")) + _default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021")) max_tokens = int(body.get("max_tokens", _default_max)) return await llm_call_openai_compat( messages, @@ -3331,7 +3664,7 @@ async def rag_query_api( resp = await llm_call_openai_compat( [{"role":"system","content":"You are precise and return only valid JSON."}, {"role":"user","content": prompt+"\n\nOnly JSON array."}], - stream=False, temperature=0.0, top_p=1.0, max_tokens=256 + stream=False, temperature=0.0, top_p=1.0, max_tokens=512 ) try: order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]")) @@ -3428,7 +3761,8 @@ initialize_agent( get_git_repo_fn=_get_git_repo_async, rag_index_repo_internal_fn=_rag_index_repo_internal, rag_query_internal_fn=_rag_query_internal, - llm_call_fn=llm_call_openai_compat, + # Gebruik auto-continue wrapper zodat agent-antwoorden niet worden afgekapt + llm_call_fn=llm_call_autocont, extract_code_block_fn=extract_code_block, read_text_file_fn=_read_text_file_wrapper, client_ip_fn=_client_ip, diff --git a/llm_client.py b/llm_client.py new file mode 100755 index 0000000..971b8b6 --- /dev/null +++ b/llm_client.py @@ -0,0 +1,131 @@ +from __future__ import annotations +import os +import asyncio +import logging +from typing import List, Dict, Any, Optional + +import httpx + +from queue_helper import QueueManager + +logger = logging.getLogger(__name__) + +# ------------------------------------------------------------- +# Config voor onderliggende LLM-backend +# ------------------------------------------------------------- +# Dit is NIET jouw eigen /v1/chat/completions endpoint, +# maar de *echte* model-backend (bijv. Ollama, vLLM, Mistral server, etc.). +LLM_API_BASE = os.getenv("LLM_API_BASE", "http://127.0.0.1:11434") +LLM_DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini") +LLM_REQUEST_TIMEOUT = float(os.getenv("LLM_REQUEST_TIMEOUT", "120")) + +# Deze wordt in app.py gezet via init_llm_client(...) +LLM_QUEUE: QueueManager | None = None + + +def init_llm_client(queue: QueueManager) -> None: + """ + Koppel de globale LLM_QUEUE aan de QueueManager uit app.py. + Deze MOET je in app.py één keer aanroepen. + """ + global LLM_QUEUE + LLM_QUEUE = queue + logger.info("llm_client: LLM_QUEUE gekoppeld via init_llm_client.") + + +def _sync_model_infer(payload: Dict[str, Any]) -> Dict[str, Any]: + """ + Synchronous call naar de echte LLM-backend. + Dit is de functie die je in app.py gebruikt bij het maken van de QueueManager. + """ + url = f"{LLM_API_BASE.rstrip('/')}/v1/chat/completions" + try: + with httpx.Client(timeout=LLM_REQUEST_TIMEOUT) as client: + resp = client.post(url, json=payload) + resp.raise_for_status() + return resp.json() + except Exception as exc: + logger.exception("LLM backend call failed: %s", exc) + return { + "object": "chat.completion", + "choices": [{ + "index": 0, + "finish_reason": "error", + "message": { + "role": "assistant", + "content": f"[LLM-fout] {exc}", + }, + }], + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + }, + } + + +async def _llm_call( + messages: List[Dict[str, str]], + *, + stream: bool = False, + temperature: float = 0.2, + top_p: float = 0.9, + max_tokens: Optional[int] = None, + model: Optional[str] = None, + **extra: Any, +) -> Dict[str, Any]: + """ + Centrale helper voor tools/agents/smart_rag/repo-agent. + + Belangrijk: + - Gebruikt de *bestaande* QueueManager uit app.py (via init_llm_client). + - Stuurt jobs in de agent-queue (lagere prioriteit dan users). + - GEEN wachtrij-meldingen ("u bent #...") voor deze interne calls. + """ + if stream: + # In deze agent gebruiken we geen streaming. + raise NotImplementedError("_llm_call(stream=True) wordt momenteel niet ondersteund.") + + if LLM_QUEUE is None: + # Hard fail: dan weet je meteen dat init_llm_client nog niet is aangeroepen. + raise RuntimeError("LLM_QUEUE is niet geïnitialiseerd. Roep init_llm_client(...) aan in app.py") + + payload: Dict[str, Any] = { + "model": model or LLM_DEFAULT_MODEL, + "messages": messages, + "stream": False, + "temperature": float(temperature), + "top_p": float(top_p), + } + if max_tokens is not None: + payload["max_tokens"] = int(max_tokens) + + payload.update(extra) + + loop = asyncio.get_running_loop() + + try: + # request_agent_sync blokkeert → naar threadpool + response: Dict[str, Any] = await loop.run_in_executor( + None, lambda: LLM_QUEUE.request_agent_sync(payload) + ) + return response + except Exception as exc: + logger.exception("_llm_call via agent-queue failed: %s", exc) + return { + "object": "chat.completion", + "choices": [{ + "index": 0, + "finish_reason": "error", + "message": { + "role": "assistant", + "content": f"[LLM-queue-fout] {exc}", + }, + }], + "usage": { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + }, + } + diff --git a/mistral-api.sh b/mistral-api.sh new file mode 100755 index 0000000..1291b57 --- /dev/null +++ b/mistral-api.sh @@ -0,0 +1 @@ +docker run -d --name mistral-api --network host -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=1 -e LLM_CONTEXT_TOKENS=13021 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=2 -e LLM_FUNCTION_CALLING_MODE=shim -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ mistral-api diff --git a/queue_helper.py b/queue_helper.py index 20caf87..f871f50 100644 --- a/queue_helper.py +++ b/queue_helper.py @@ -45,11 +45,20 @@ class QueueManager: self._worker.start() # ---------- public API ---------- - def enqueue_user(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> tuple[str, int]: + def enqueue_user( + self, + payload: Dict, + progress_cb: Callable[[Dict], None], + *, + notify_position: bool = False, + ) -> tuple[str, int]: job = _Job(payload, progress_cb) try: self._user_q.put_nowait(job) except queue.Full: raise RuntimeError(f"User‑queue vol (≥{USER_MAX_QUEUE})") position = self._user_q.qsize() + if notify_position: + # start een aparte notifier-thread die periodiek de wachtrijpositie meldt + start_position_notifier(job, self._user_q) return job.job_id, position def enqueue_agent(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> str: @@ -58,6 +67,32 @@ class QueueManager: except queue.Full: raise RuntimeError(f"Agent‑queue vol (≥{AGENT_MAX_QUEUE})") return job.job_id + # ---------- sync helper voor agents/tools ---------- + def request_agent_sync(self, payload: Dict, timeout: float = WORKER_TIMEOUT) -> Dict: + """ + Gebruik dit voor interne calls (agents/tools). + - Job wordt in de agent-queue gezet (lagere prioriteit dan users). + - We wachten blokkerend tot de worker klaar is of tot timeout. + - Er worden GEEN wachtrij-meldingen ("U bent #...") verstuurd. + """ + result_box: Dict[str, Any] = {} + + def _cb(msg: Dict): + # alleen het eindresultaat is interessant voor tools/agents + result_box["answer"] = msg + + job = _Job(payload, _cb) + try: + self._agent_q.put_nowait(job) + except queue.Full: + raise RuntimeError(f"Agent-queue vol (≥{AGENT_MAX_QUEUE})") + + ok = job.event.wait(timeout) + if not ok: + raise TimeoutError(f"LLM-inference duurde langer dan {timeout} seconden.") + if job.error: + raise RuntimeError(job.error) + return result_box.get("answer") or {} # ---------- worker ---------- def _run_worker(self): while not self._shutdown.is_set(): @@ -79,18 +114,24 @@ class QueueManager: self._shutdown.set() self._worker.join(timeout=5) -def start_position_notifier(job: _Job, - queue_ref: queue.Queue, - interval: float = UPDATE_INTERVAL): +def start_position_notifier( + job: _Job, + queue_ref: queue.Queue, + interval: float = UPDATE_INTERVAL, + ): """Stuurt elke `interval` seconden een bericht met de huidige positie.""" def _notifier(): - while not job.event.is_set(): + # Stop zodra het job-event wordt gezet (success/fout/timeout upstream) + while not job.event.wait(interval): + # Neem een snapshot van de queue-inhoud op een thread-safe manier + with queue_ref.mutex: + snapshot = list(queue_ref.queue) try: - pos = list(queue_ref.queue).index(job) + 1 # 1‑based + pos = snapshot.index(job) + 1 # 1-based except ValueError: + # Job staat niet meer in de wachtrij → geen updates meer nodig break job.callback({"info": f"U bent #{pos} in de wachtrij. Even geduld…" }) - time.sleep(interval) t = threading.Thread(target=_notifier, daemon=True) t.start() return t diff --git a/smart_rag.py b/smart_rag.py index 05c3759..55c1c2d 100644 --- a/smart_rag.py +++ b/smart_rag.py @@ -296,7 +296,7 @@ async def hybrid_retrieve( # Optionele query-routing + RRF use_route = str(os.getenv("RAG_ROUTE", "1")).lower() not in ("0", "false") use_rrf = str(os.getenv("RAG_RRF", "1")).lower() not in ("0", "false") - # Optionele mini multi-query expansion (default uit) + # Optionele mini multi-query expansion (default aan) use_expand = str(os.getenv("RAG_MULTI_EXPAND", "1")).lower() in ("1","true","yes") k_variants = max(1, int(os.getenv("RAG_MULTI_K", "3"))) per_query_k = max(1, int(per_query_k)) @@ -317,10 +317,14 @@ async def hybrid_retrieve( if use_route: buckets = _route_query_buckets(qv) for b in buckets: + # combineer globale path_contains-hint met bucket-specifieke filter + pc = b.get("path_contains") + if path_contains and not pc: + pc = path_contains res = await rag_query_internal_fn( query=b["q"], n_results=per_query_k, collection_name=collection_name, - repo=repo, path_contains=b["path_contains"], profile=profile + repo=repo, path_contains=pc, profile=profile ) lst = [] for item in (res or {}).get("results", []): @@ -594,5 +598,7 @@ __all__ = [ "expand_queries", "hybrid_retrieve", "assemble_context", + "_laravel_pairs_from_route_text", + "_laravel_guess_view_paths_from_text", ] diff --git a/windowing_utils.py b/windowing_utils.py index a4354e0..81a144f 100644 --- a/windowing_utils.py +++ b/windowing_utils.py @@ -1,7 +1,7 @@ # windowing_utils.py from __future__ import annotations from dataclasses import dataclass, field -from typing import List, Dict, Callable, Optional, Tuple +from typing import List, Dict, Callable, Optional, Tuple, Awaitable import hashlib import os import time @@ -64,7 +64,7 @@ class ConversationWindow: async def build_within_budget( self, system_prompt: Optional[str], - summarizer: Optional[Callable[[str, List[Dict]], "awaitable[str]"]] = None + summarizer: Optional[Callable[[str, List[Dict]], Awaitable[str]]] = None ) -> List[Dict]: budget = self.max_ctx_tokens - max(1, self.response_reserve) working = self.history[:] @@ -106,6 +106,11 @@ class ConversationWindow: self.running_summary = await summarizer(self.running_summary, chunk_buf) chunk_buf = [] + # verwerk eventuele overgebleven buffer zodat er geen turns verdwijnen + if chunk_buf: + self.running_summary = await summarizer(self.running_summary, chunk_buf) + chunk_buf = [] + self.history = working return await build_candidate(self.running_summary, working)