RAG updates
This commit is contained in:
parent
8a97368577
commit
a79293abbd
56
Dockerfile
56
Dockerfile
@ -2,11 +2,31 @@ FROM python:3.11-slim
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# ===== Model caches op vaste paden (blijven in image) =====
|
||||||
|
# Hugging Face caches (embeddings) + XDG cache (o.a. whisper)
|
||||||
|
ENV HF_HOME=/opt/hf \
|
||||||
|
HUGGINGFACE_HUB_CACHE=/opt/hf \
|
||||||
|
TRANSFORMERS_CACHE=/opt/hf \
|
||||||
|
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
|
||||||
|
XDG_CACHE_HOME=/opt/cache \
|
||||||
|
STT_MODEL=small
|
||||||
|
|
||||||
|
# Optioneel build-args om modelkeuzes te pinnen
|
||||||
|
ARG RAG_EMBEDDINGS=gte-multilingual # of: bge-small / e5-small / gte-base-en
|
||||||
|
ARG STT_MODEL_ARG=small # tiny | base | small | medium | large-v3, etc.
|
||||||
|
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
|
||||||
|
ENV STT_MODEL=${STT_MODEL_ARG}
|
||||||
|
|
||||||
|
# maak directories nu al aan (rechten)
|
||||||
|
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
|
||||||
|
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
|
||||||
|
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils
|
RUN apt-get update && apt-get -y install git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential
|
||||||
RUN pip install --upgrade pip
|
RUN pip install --upgrade pip
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25
|
RUN pip install PyPDF2 python-multipart gitpython chromadb httpx meilisearch pandas openpyxl python-pptx faster-whisper==1.0.0 cairosvg sentence-transformers rank-bm25
|
||||||
|
#RUN pip cache purge
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
wget ca-certificates libstdc++6 libatomic1 \
|
wget ca-certificates libstdc++6 libatomic1 \
|
||||||
@ -19,12 +39,46 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
ln -sf /opt/piper/piper /usr/local/bin/piper; \
|
ln -sf /opt/piper/piper /usr/local/bin/piper; \
|
||||||
rm -f /tmp/piper.tgz
|
rm -f /tmp/piper.tgz
|
||||||
|
|
||||||
|
# ===== Prefetch modellen tijdens de build =====
|
||||||
|
# 1) SentenceTransformers (embeddings) — volgens je mapping in app.py
|
||||||
|
RUN python - <<'PY'
|
||||||
|
import os
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
mapping = {
|
||||||
|
"gte-multilingual": ("Alibaba-NLP/gte-multilingual-base"),
|
||||||
|
"bge-small": ("BAAI/bge-small-en-v1.5"),
|
||||||
|
"e5-small": ("intfloat/e5-small-v2"),
|
||||||
|
"gte-base-en": ("thenlper/gte-base"),
|
||||||
|
}
|
||||||
|
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
||||||
|
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
||||||
|
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
|
||||||
|
SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
|
||||||
|
print("Prefetched SentenceTransformer:", hf_id)
|
||||||
|
PY
|
||||||
|
|
||||||
|
# 2) faster-whisper (STT) — cache in /opt/cache/whisper
|
||||||
|
RUN python - <<'PY'
|
||||||
|
import os
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
name = os.environ.get("STT_MODEL","small")
|
||||||
|
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
|
||||||
|
os.makedirs(cache_root, exist_ok=True)
|
||||||
|
# Build-time altijd CPU/INT8 (geen GPU nodig tijdens build)
|
||||||
|
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
|
||||||
|
print("Prefetched faster-whisper:", name, "->", cache_root)
|
||||||
|
PY
|
||||||
|
|
||||||
|
# (optioneel) piper voice kun je hier ook voorcachen; laat ik nu achterwege omdat voice per omgeving wisselt.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
COPY app.py .
|
COPY app.py .
|
||||||
COPY queue_helper.py .
|
COPY queue_helper.py .
|
||||||
COPY agent_repo.py .
|
COPY agent_repo.py .
|
||||||
COPY windowing_utils.py .
|
COPY windowing_utils.py .
|
||||||
COPY smart_rag.py .
|
COPY smart_rag.py .
|
||||||
|
COPY llm_client .
|
||||||
|
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|
||||||
|
|||||||
205
agent_repo.py
205
agent_repo.py
@ -18,7 +18,7 @@ from windowing_utils import approx_token_count
|
|||||||
from starlette.concurrency import run_in_threadpool
|
from starlette.concurrency import run_in_threadpool
|
||||||
import asyncio
|
import asyncio
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from llm_client import _llm_call
|
||||||
|
|
||||||
# --- Async I/O executors (voorkom event-loop blocking) ---
|
# --- Async I/O executors (voorkom event-loop blocking) ---
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
@ -59,7 +59,59 @@ _meili_search_fn = None
|
|||||||
_GRAPH_CACHE: dict[str, dict[str, set[str]]] = {}
|
_GRAPH_CACHE: dict[str, dict[str, set[str]]] = {}
|
||||||
_TREE_SUM_CACHE: dict[str, dict[str, str]] = {}
|
_TREE_SUM_CACHE: dict[str, dict[str, str]] = {}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Fast-path helpers: expliciete paden + vervangpaar (old->new)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
_Q = r"[\"'“”‘’`]"
|
||||||
|
_PATH_PATS = [
|
||||||
|
r"[\"“”'](resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)[\"”']",
|
||||||
|
r"(resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)",
|
||||||
|
r"[\"“”'](app\/[A-Za-z0-9_\/\.-]+\.php)[\"”']",
|
||||||
|
r"(app\/[A-Za-z0-9_\/\.-]+\.php)",
|
||||||
|
]
|
||||||
|
_TRANS_WRAPPERS = [
|
||||||
|
r"__\(\s*{q}(.+?){q}\s*\)".format(q=_Q),
|
||||||
|
r"@lang\(\s*{q}(.+?){q}\s*\)".format(q=_Q),
|
||||||
|
r"trans\(\s*{q}(.+?){q}\s*\)".format(q=_Q),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_repo_branch_from_text(txt: str) -> Tuple[Optional[str], str]:
|
||||||
|
repo_url, branch = None, "main"
|
||||||
|
m = re.search(r"\bRepo\s*:\s*(\S+)", txt, flags=re.I)
|
||||||
|
if m: repo_url = m.group(1).strip()
|
||||||
|
mb = re.search(r"\bbranch\s*:\s*([A-Za-z0-9._/-]+)", txt, flags=re.I)
|
||||||
|
if mb: branch = mb.group(1).strip()
|
||||||
|
return repo_url, branch
|
||||||
|
|
||||||
|
def _extract_explicit_paths(txt: str) -> List[str]:
|
||||||
|
out = []
|
||||||
|
for pat in _PATH_PATS:
|
||||||
|
for m in re.finditer(pat, txt):
|
||||||
|
p = m.group(1)
|
||||||
|
if p and p not in out:
|
||||||
|
out.append(p)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _extract_replace_pair(txt: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
# NL/EN varianten + “slimme” quotes
|
||||||
|
pats = [
|
||||||
|
rf"Vervang\s+de\s+tekst\s*{_Q}(.+?){_Q}[^.\n]*?(?:in|naar|verander(?:en)?\s+in)\s*{_Q}(.+?){_Q}",
|
||||||
|
rf"Replace(?:\s+the)?\s+text\s*{_Q}(.+?){_Q}\s*(?:to|with)\s*{_Q}(.+?){_Q}",
|
||||||
|
]
|
||||||
|
for p in pats:
|
||||||
|
m = re.search(p, txt, flags=re.I|re.S)
|
||||||
|
if m:
|
||||||
|
return m.group(1), m.group(2)
|
||||||
|
mm = re.search(r"(Vervang|Replace)[\s\S]*?"+_Q+"(.+?)"+_Q+"[\s\S]*?"+_Q+"(.+?)"+_Q, txt, flags=re.I)
|
||||||
|
if mm:
|
||||||
|
return mm.group(2), mm.group(3)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def _looks_like_unified_diff_request(txt: str) -> bool:
|
||||||
|
if re.search(r"\bunified\s+diff\b", txt, flags=re.I): return True
|
||||||
|
if re.search(r"\b(diff|patch)\b", txt, flags=re.I) and _extract_explicit_paths(txt):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
# zet dit dicht bij de andere module-consts
|
# zet dit dicht bij de andere module-consts
|
||||||
async def _call_get_git_repo(repo_url: str, branch: str):
|
async def _call_get_git_repo(repo_url: str, branch: str):
|
||||||
@ -2873,7 +2925,7 @@ def laravel_scan_routes(repo_root: Path) -> list[dict]:
|
|||||||
if not p.exists():
|
if not p.exists():
|
||||||
continue
|
continue
|
||||||
txt = _read_file_safe(p)
|
txt = _read_file_safe(p)
|
||||||
for m in re.finditer(r"Route::(get|post|put|patch|delete|match|resource)\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*([^)]+)\)", txt, flags=re.I):
|
for m in re.finditer(r"Route::(get|post|put|patch|delete|match)\s*\(\s*['\"]([^'\"]+)['\"]\s*,\s*([^)]+)\)", txt, flags=re.I):
|
||||||
verb, uri, target = m.group(1).lower(), m.group(2), m.group(3)
|
verb, uri, target = m.group(1).lower(), m.group(2), m.group(3)
|
||||||
ctrl = None; method = None; name = None
|
ctrl = None; method = None; name = None
|
||||||
# controller@method
|
# controller@method
|
||||||
@ -3926,13 +3978,122 @@ async def _llm_task_route(user_goal: str, framework: str = "laravel") -> dict:
|
|||||||
|
|
||||||
# ---------- Hoofd-handler ----------
|
# ---------- Hoofd-handler ----------
|
||||||
async def handle_repo_agent(messages: List[dict], request) -> str:
|
async def handle_repo_agent(messages: List[dict], request) -> str:
|
||||||
|
"""
|
||||||
|
Uitbreiding: fast-path voor unified diffs op expliciete bestanden met tekstvervanging.
|
||||||
|
Als niet van toepassing, valt automatisch terug op de bestaande flow.
|
||||||
|
"""
|
||||||
|
# 1) Combineer user/system content om opdracht te parsen
|
||||||
|
try:
|
||||||
|
full_txt = "\n".join([m.get("content","") for m in messages if m.get("role") in ("system","user")])
|
||||||
|
except Exception:
|
||||||
|
full_txt = ""
|
||||||
|
|
||||||
|
# 2) Herken fast-path
|
||||||
|
try_fast = _looks_like_unified_diff_request(full_txt)
|
||||||
|
paths_fp = _extract_explicit_paths(full_txt) if try_fast else []
|
||||||
|
old_txt, new_txt = _extract_replace_pair(full_txt) if try_fast else (None, None)
|
||||||
|
|
||||||
|
# NB: we gebruiken de injecties die via initialize_agent zijn gezet:
|
||||||
|
# - get_git_repo_fn (async)
|
||||||
|
# - read_text_file_fn (sync)
|
||||||
|
# Deze symbolen worden onderin initialize_agent aan globals() gehangen.
|
||||||
|
get_git_repo_fn = globals().get("get_git_repo_fn")
|
||||||
|
read_text_file_fn = globals().get("read_text_file_fn")
|
||||||
|
|
||||||
|
if try_fast and paths_fp and old_txt and new_txt and callable(get_git_repo_fn) and callable(read_text_file_fn):
|
||||||
|
# 3) repo + branch bepalen
|
||||||
|
repo_url, branch = _extract_repo_branch_from_text(full_txt)
|
||||||
|
if not repo_url:
|
||||||
|
# fallback: probeer repo uit eerdere agent-state (optioneel), anders stop fast-path
|
||||||
|
repo_url = globals().get("_last_repo_url")
|
||||||
|
branch = globals().get("_last_branch", "main")
|
||||||
|
if repo_url:
|
||||||
|
try:
|
||||||
|
repo_root = await get_git_repo_fn(repo_url, branch or "main")
|
||||||
|
root = Path(repo_root)
|
||||||
|
lang_path = root / "resources" / "lang" / "nl.json"
|
||||||
|
lang_before = lang_path.read_text(encoding="utf-8", errors="ignore") if lang_path.exists() else "{}"
|
||||||
|
lang_data = {}
|
||||||
|
try:
|
||||||
|
lang_data = json.loads(lang_before or "{}")
|
||||||
|
except Exception:
|
||||||
|
lang_data = {}
|
||||||
|
|
||||||
|
diffs_out = []
|
||||||
|
lang_changed = False
|
||||||
|
|
||||||
|
def _make_udiff(a: str, b: str, rel: str) -> str:
|
||||||
|
return "".join(difflib.unified_diff(
|
||||||
|
a.splitlines(keepends=True),
|
||||||
|
b.splitlines(keepends=True),
|
||||||
|
fromfile=f"a/{rel}", tofile=f"b/{rel}", n=3
|
||||||
|
))
|
||||||
|
|
||||||
|
# 4) per bestand: ofwel inline replace, ofwel vertaling bijwerken
|
||||||
|
for rel in paths_fp:
|
||||||
|
p = root / rel
|
||||||
|
if not p.exists():
|
||||||
|
continue
|
||||||
|
before = read_text_file_fn(p)
|
||||||
|
if not before:
|
||||||
|
continue
|
||||||
|
# Als de 'oude' tekst voorkomt BINNEN een vertaalwrapper, dan géén blade-edit
|
||||||
|
found_in_wrapper = False
|
||||||
|
for pat in _TRANS_WRAPPERS:
|
||||||
|
for m in re.finditer(pat, before):
|
||||||
|
inner = m.group(1)
|
||||||
|
if inner == old_txt:
|
||||||
|
found_in_wrapper = True
|
||||||
|
break
|
||||||
|
if found_in_wrapper:
|
||||||
|
break
|
||||||
|
if found_in_wrapper:
|
||||||
|
# update nl.json: {"oude": "nieuwe"}
|
||||||
|
if lang_data.get(old_txt) != new_txt:
|
||||||
|
lang_data[old_txt] = new_txt
|
||||||
|
lang_changed = True
|
||||||
|
continue
|
||||||
|
|
||||||
|
# anders: directe, exacte vervanging (conservatief)
|
||||||
|
after = before.replace(old_txt, new_txt)
|
||||||
|
if after != before:
|
||||||
|
diff = _make_udiff(before, after, rel)
|
||||||
|
if diff.strip():
|
||||||
|
diffs_out.append(("blade", rel, diff))
|
||||||
|
|
||||||
|
# 5) indien vertaling gewijzigd: diff voor nl.json toevoegen
|
||||||
|
if lang_changed:
|
||||||
|
new_lang = json.dumps(lang_data, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
||||||
|
diff_lang = _make_udiff(lang_before if isinstance(lang_before, str) else "", new_lang, "resources/lang/nl.json")
|
||||||
|
if diff_lang.strip():
|
||||||
|
diffs_out.append(("lang", "resources/lang/nl.json", diff_lang))
|
||||||
|
if diffs_out:
|
||||||
|
parts = ["### Unified diffs"]
|
||||||
|
for kind, rel, d in diffs_out:
|
||||||
|
parts.append(f"**{rel}**")
|
||||||
|
parts.append("```diff\n" + d + "```")
|
||||||
|
return "\n\n".join(parts)
|
||||||
|
else:
|
||||||
|
return "Dry-run: geen wijzigbare treffers gevonden in opgegeven bestanden (of reeds actueel)."
|
||||||
|
except Exception as e:
|
||||||
|
# mislukt → val terug op bestaande discover/agent flow
|
||||||
|
pass
|
||||||
|
|
||||||
|
# === GEEN fast-path → ga door met de bestaande flow hieronder ===
|
||||||
|
|
||||||
sid = _get_session_id(messages, request)
|
sid = _get_session_id(messages, request)
|
||||||
st = _app.state.AGENT_SESSIONS.get(sid) or AgentState()
|
st = _app.state.AGENT_SESSIONS.get(sid) or AgentState()
|
||||||
_app.state.AGENT_SESSIONS[sid] = st
|
_app.state.AGENT_SESSIONS[sid] = st
|
||||||
user_last = next((m["content"] for m in reversed(messages) if m.get("role")=="user"), "").strip()
|
user_last = next((m["content"] for m in reversed(messages) if m.get("role")=="user"), "").strip()
|
||||||
user_last_lower = user_last.lower()
|
user_last_lower = user_last.lower()
|
||||||
logger.info("INFO:agent_repo:[%s] stage=%s", sid, st.stage)
|
logger.info("INFO:agent_repo:[%s] stage=%s", sid, st.stage)
|
||||||
from smart_rag import enrich_intent, expand_queries, hybrid_retrieve
|
from smart_rag import (
|
||||||
|
enrich_intent,
|
||||||
|
expand_queries,
|
||||||
|
hybrid_retrieve,
|
||||||
|
_laravel_pairs_from_route_text,
|
||||||
|
_laravel_guess_view_paths_from_text,
|
||||||
|
)
|
||||||
# Als user een .git URL meegeeft: zet state en ga via de state-machine verder
|
# Als user een .git URL meegeeft: zet state en ga via de state-machine verder
|
||||||
user_txt = next((m.get("content","") for m in reversed(messages) if m.get("role")=="user"), "")
|
user_txt = next((m.get("content","") for m in reversed(messages) if m.get("role")=="user"), "")
|
||||||
repo_url = await _detect_repo_url(user_txt)
|
repo_url = await _detect_repo_url(user_txt)
|
||||||
@ -4287,16 +4448,13 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
|
|||||||
picked.append(f)
|
picked.append(f)
|
||||||
|
|
||||||
# --- VIEW/LANG bias voor UI-label wijzigingen ---
|
# --- VIEW/LANG bias voor UI-label wijzigingen ---
|
||||||
if task_type == "ui_label_change":
|
# Pak de eerste quote uit de prompt als "oude" literal
|
||||||
# Probeer de 'oude' literal uit de prompt te halen (voor gerichter filteren)
|
qs = extract_quotes(st.user_goal) or []
|
||||||
try:
|
old_lit = qs[0] if qs else None
|
||||||
old_lit, _new_lit, _why = deduce_old_new_literals(st.user_goal, "")
|
|
||||||
except Exception:
|
|
||||||
old_lit = None
|
|
||||||
|
|
||||||
def _contains_old(rel: str) -> bool:
|
def _contains_old(rel: str) -> bool:
|
||||||
if not old_lit:
|
if not old_lit:
|
||||||
return True
|
return True # fallback: geen filtering
|
||||||
try:
|
try:
|
||||||
txt = _read_text_file(Path(st.repo_path) / rel) or ""
|
txt = _read_text_file(Path(st.repo_path) / rel) or ""
|
||||||
return old_lit in txt
|
return old_lit in txt
|
||||||
@ -4374,22 +4532,12 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
|
|||||||
# routes -> controllers
|
# routes -> controllers
|
||||||
if rel in ("routes/web.php","routes/api.php"):
|
if rel in ("routes/web.php","routes/api.php"):
|
||||||
txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore")
|
txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore")
|
||||||
try:
|
|
||||||
from app import _laravel_pairs_from_route_text # of waar je helper staat
|
|
||||||
except Exception:
|
|
||||||
_laravel_pairs_from_route_text = None
|
|
||||||
if _laravel_pairs_from_route_text:
|
|
||||||
for ctrl_path, _m in _laravel_pairs_from_route_text(txt):
|
for ctrl_path, _m in _laravel_pairs_from_route_text(txt):
|
||||||
if ctrl_path and ctrl_path not in picked and ctrl_path not in add:
|
if ctrl_path and ctrl_path not in picked and ctrl_path not in add:
|
||||||
add.append(ctrl_path)
|
add.append(ctrl_path)
|
||||||
# controllers -> views
|
# controllers -> views
|
||||||
if rel.startswith("app/Http/Controllers/") and rel.endswith(".php"):
|
if rel.startswith("app/Http/Controllers/") and rel.endswith(".php"):
|
||||||
txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore")
|
txt = (Path(st.repo_path)/rel).read_text(encoding="utf-8", errors="ignore")
|
||||||
try:
|
|
||||||
from app import _laravel_guess_view_paths_from_text
|
|
||||||
except Exception:
|
|
||||||
_laravel_guess_view_paths_from_text = None
|
|
||||||
if _laravel_guess_view_paths_from_text:
|
|
||||||
for v in _laravel_guess_view_paths_from_text(txt):
|
for v in _laravel_guess_view_paths_from_text(txt):
|
||||||
if v and v not in picked and v not in add:
|
if v and v not in picked and v not in add:
|
||||||
add.append(v)
|
add.append(v)
|
||||||
@ -4410,12 +4558,10 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
|
|||||||
pass
|
pass
|
||||||
picked = (picked + add + more)[:MAX_FILES_DRYRUN]
|
picked = (picked + add + more)[:MAX_FILES_DRYRUN]
|
||||||
# 5) Literal-grep fallback: als de user een oud->nieuw wijziging impliceert, zoek de 'old' literal repo-breed
|
# 5) Literal-grep fallback: als de user een oud->nieuw wijziging impliceert, zoek de 'old' literal repo-breed
|
||||||
try:
|
qs = extract_quotes(st.user_goal) or []
|
||||||
old, new, _why_pair = deduce_old_new_literals(st.user_goal, "")
|
old = qs[0].strip() if qs and qs[0].strip() else None
|
||||||
except Exception:
|
if old:
|
||||||
old, new = None, None
|
grep_hits = _grep_repo_for_literal(Path(st.repo_path), old, limit=16)
|
||||||
if old and isinstance(old, str) and old.strip():
|
|
||||||
grep_hits = _grep_repo_for_literal(Path(st.repo_path), old.strip(), limit=16)
|
|
||||||
for rel in grep_hits:
|
for rel in grep_hits:
|
||||||
if rel in all_files and rel not in picked:
|
if rel in all_files and rel not in picked:
|
||||||
picked.append(rel)
|
picked.append(rel)
|
||||||
@ -4589,15 +4735,14 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
|
|||||||
if best and best not in st.candidate_paths: added.append(best)
|
if best and best not in st.candidate_paths: added.append(best)
|
||||||
st.candidate_paths = (added + st.candidate_paths)[:MAX_FILES_DRYRUN]
|
st.candidate_paths = (added + st.candidate_paths)[:MAX_FILES_DRYRUN]
|
||||||
# extra: grep op 'old' literal uit user_goal om kandidaten te verrijken
|
# extra: grep op 'old' literal uit user_goal om kandidaten te verrijken
|
||||||
try:
|
qs = extract_quotes(st.user_goal) or []
|
||||||
old, new, _why_pair = deduce_old_new_literals(st.user_goal, "")
|
old = qs[0].strip() if qs and qs[0].strip() else None
|
||||||
except Exception:
|
|
||||||
old = None
|
|
||||||
if old:
|
if old:
|
||||||
for rel in _grep_repo_for_literal(root, old, limit=16):
|
for rel in _grep_repo_for_literal(root, old, limit=16):
|
||||||
if rel in all_files and rel not in st.candidate_paths:
|
if rel in all_files and rel not in st.candidate_paths:
|
||||||
st.candidate_paths.append(rel)
|
st.candidate_paths.append(rel)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
proposed, diffs, reasons = await propose_patches_without_apply(st.repo_path, st.candidate_paths, st.user_goal)
|
proposed, diffs, reasons = await propose_patches_without_apply(st.repo_path, st.candidate_paths, st.user_goal)
|
||||||
if not proposed:
|
if not proposed:
|
||||||
|
|||||||
378
app.py
378
app.py
@ -32,7 +32,11 @@ from fastapi.routing import APIRoute
|
|||||||
from starlette.concurrency import run_in_threadpool
|
from starlette.concurrency import run_in_threadpool
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
AUTO_CONTINUE = os.getenv("AUTO_CONTINUE", "1").lower() not in ("0","false","no")
|
||||||
|
AUTO_CONTINUE_MAX_ROUNDS = int(os.getenv("AUTO_CONTINUE_MAX_ROUNDS", "6"))
|
||||||
|
AUTO_CONTINUE_TAIL_CHARS = int(os.getenv("AUTO_CONTINUE_TAIL_CHARS", "600"))
|
||||||
|
|
||||||
|
from llm_client import init_llm_client, _sync_model_infer
|
||||||
|
|
||||||
# Optionele libs voor tekst-extractie
|
# Optionele libs voor tekst-extractie
|
||||||
try:
|
try:
|
||||||
@ -71,6 +75,17 @@ except Exception:
|
|||||||
cairosvg = None
|
cairosvg = None
|
||||||
|
|
||||||
import tempfile, subprocess # voor audio
|
import tempfile, subprocess # voor audio
|
||||||
|
import html # ← nieuw: unescape van HTML-entities in tool-call JSON
|
||||||
|
|
||||||
|
# Forceer consistente caches zowel binnen als buiten Docker
|
||||||
|
os.environ.setdefault("HF_HOME", "/opt/hf")
|
||||||
|
os.environ.setdefault("HUGGINGFACE_HUB_CACHE", "/opt/hf")
|
||||||
|
os.environ.setdefault("TRANSFORMERS_CACHE", "/opt/hf")
|
||||||
|
os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
|
||||||
|
os.environ.setdefault("XDG_CACHE_HOME", "/opt/cache")
|
||||||
|
# whisper cache pad (gebruikt door faster-whisper)
|
||||||
|
os.environ.setdefault("WHISPER_CACHE_DIR", os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper"))
|
||||||
|
|
||||||
|
|
||||||
# STT (Whisper-compatible via faster-whisper) — optioneel
|
# STT (Whisper-compatible via faster-whisper) — optioneel
|
||||||
_STT_MODEL = None
|
_STT_MODEL = None
|
||||||
@ -148,15 +163,53 @@ def _build_tools_system_prompt(tools: list) -> str:
|
|||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
def _extract_tool_calls_from_text(txt: str):
|
def _extract_tool_calls_from_text(txt: str):
|
||||||
# tolerant: pak eerste JSON object (ook als het in ```json staat)
|
s = (txt or "").strip()
|
||||||
m = re.search(r"\{[\s\S]*\}", txt or "")
|
# Strip code fences & bekende chat-tokens
|
||||||
|
if s.startswith("```"):
|
||||||
|
s = extract_code_block(s)
|
||||||
|
s = re.sub(r"^<\|im_start\|>\s*assistant\s*", "", s, flags=re.I)
|
||||||
|
# HTML-escaped JSON (" etc.)
|
||||||
|
try:
|
||||||
|
s = html.unescape(s)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# tolerant: pak eerste JSON object
|
||||||
|
m = re.search(r"\{[\s\S]*\}", s)
|
||||||
if not m:
|
if not m:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
obj = json.loads(m.group(0))
|
obj = json.loads(m.group(0))
|
||||||
except Exception:
|
except Exception:
|
||||||
return []
|
return []
|
||||||
tc = obj.get("tool_calls") or []
|
# === Normaliseer verschillende dialecten ===
|
||||||
|
# 1) OpenAI: {"tool_calls":[{"type":"function","function":{"name":..,"arguments":..}}]}
|
||||||
|
if "tool_calls" in obj and isinstance(obj["tool_calls"], list):
|
||||||
|
tc = obj["tool_calls"]
|
||||||
|
# 2) Replit/Magistral/Mistral-achtig: {"call_tool":{"name":"...","arguments":{...}}} óf lijst
|
||||||
|
elif "call_tool" in obj:
|
||||||
|
raw = obj["call_tool"]
|
||||||
|
if isinstance(raw, dict):
|
||||||
|
raw = [raw]
|
||||||
|
tc = []
|
||||||
|
for it in (raw or []):
|
||||||
|
name = (it or {}).get("name")
|
||||||
|
args = (it or {}).get("arguments") or {}
|
||||||
|
if isinstance(args, str):
|
||||||
|
try: args = json.loads(args)
|
||||||
|
except Exception: pass
|
||||||
|
if name:
|
||||||
|
tc.append({"type":"function","function":{"name": name, "arguments": json.dumps(args, ensure_ascii=False)}})
|
||||||
|
# 3) Legacy OpenAI: {"function_call":{"name":"...","arguments":"{...}"}}
|
||||||
|
elif "function_call" in obj and isinstance(obj["function_call"], dict):
|
||||||
|
fc = obj["function_call"]
|
||||||
|
name = fc.get("name")
|
||||||
|
args = fc.get("arguments") or {}
|
||||||
|
if isinstance(args, str):
|
||||||
|
try: args = json.loads(args)
|
||||||
|
except Exception: pass
|
||||||
|
tc = [{"type":"function","function":{"name": name, "arguments": json.dumps(args, ensure_ascii=False)}}] if name else []
|
||||||
|
else:
|
||||||
|
tc = []
|
||||||
out = []
|
out = []
|
||||||
for c in tc:
|
for c in tc:
|
||||||
name = (c or {}).get("name")
|
name = (c or {}).get("name")
|
||||||
@ -166,6 +219,13 @@ def _extract_tool_calls_from_text(txt: str):
|
|||||||
args = json.loads(args)
|
args = json.loads(args)
|
||||||
except Exception:
|
except Exception:
|
||||||
args = {}
|
args = {}
|
||||||
|
# Support zowel {"name":..,"arguments":..} als {"type":"function","function":{...}}
|
||||||
|
if not name and isinstance(c.get("function"), dict):
|
||||||
|
name = c["function"].get("name")
|
||||||
|
args = c["function"].get("arguments") or args
|
||||||
|
if isinstance(args, str):
|
||||||
|
try: args = json.loads(args)
|
||||||
|
except Exception: args = {}
|
||||||
if name:
|
if name:
|
||||||
out.append({
|
out.append({
|
||||||
"id": f"call_{uuid.uuid4().hex[:8]}",
|
"id": f"call_{uuid.uuid4().hex[:8]}",
|
||||||
@ -177,6 +237,46 @@ def _extract_tool_calls_from_text(txt: str):
|
|||||||
})
|
})
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
# --- Universal toolcall detector (JSON + ReAct + HTML-escaped) ---
|
||||||
|
_TOOL_REACT = re.compile(
|
||||||
|
r"Action\s*:\s*([A-Za-z0-9_\-\.]+)\s*[\r\n]+Action\s*Input\s*:\s*(?:```json\s*([\s\S]*?)\s*```|([\s\S]*))",
|
||||||
|
re.I
|
||||||
|
)
|
||||||
|
|
||||||
|
def detect_toolcalls_any(text: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Retourneert altijd OpenAI-achtige tool_calls items:
|
||||||
|
[{"id":..., "type":"function", "function":{"name":..., "arguments": "{...}"}}]
|
||||||
|
"""
|
||||||
|
calls = _extract_tool_calls_from_text(text)
|
||||||
|
if calls:
|
||||||
|
return calls
|
||||||
|
s = (text or "").strip()
|
||||||
|
try:
|
||||||
|
s = html.unescape(s)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# ReAct fallback
|
||||||
|
m = _TOOL_REACT.search(s)
|
||||||
|
if m:
|
||||||
|
name = (m.group(1) or "").strip()
|
||||||
|
raw = (m.group(2) or m.group(3) or "").strip()
|
||||||
|
# strip eventuele fences
|
||||||
|
if raw.startswith("```"):
|
||||||
|
raw = extract_code_block(raw)
|
||||||
|
try:
|
||||||
|
args = json.loads(raw) if raw else {}
|
||||||
|
except Exception:
|
||||||
|
# laatste redmiddel: wikkel als {"input": "<raw>"}
|
||||||
|
args = {"input": raw}
|
||||||
|
if name:
|
||||||
|
return [{
|
||||||
|
"id": f"call_{uuid.uuid4().hex[:8]}",
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)}
|
||||||
|
}]
|
||||||
|
return []
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# App & logging
|
# App & logging
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
@ -197,6 +297,21 @@ app.add_middleware(
|
|||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Eén centrale QueueManager voor ALLE LLM-calls
|
||||||
|
#app.state.LLM_QUEUE = QueueManager(model_infer_fn=_sync_model_infer)
|
||||||
|
|
||||||
|
# Koppel deze queue aan llm_client zodat _llm_call dezelfde queue gebruikt
|
||||||
|
#init_llm_client(app.state.LLM_QUEUE)
|
||||||
|
|
||||||
|
# Let op:
|
||||||
|
# - llm_call_openai_compat verwacht dat app.state.LLM_QUEUE een deque-achtige queue is
|
||||||
|
# (append/index/popleft/remove).
|
||||||
|
# - De QueueManager uit queue_helper.py is thread-based en wordt hier niet gebruikt.
|
||||||
|
# Als je QueueManager wilt inzetten, doe dat in llm_client.py of elders,
|
||||||
|
# maar niet als app.state.LLM_QUEUE, om type-conflicten te voorkomen.
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def _startup():
|
async def _startup():
|
||||||
# Zorg dat lokale hosts nooit via een proxy gaan
|
# Zorg dat lokale hosts nooit via een proxy gaan
|
||||||
@ -697,7 +812,7 @@ async def llm_call_openai_compat(
|
|||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
temperature: float = 0.2,
|
temperature: float = 0.2,
|
||||||
top_p: float = 0.9,
|
top_p: float = 0.9,
|
||||||
max_tokens: int = 1024,
|
max_tokens: int = 13027,
|
||||||
extra: Optional[dict] = None,
|
extra: Optional[dict] = None,
|
||||||
stop: Optional[Union[str, list[str]]] = None,
|
stop: Optional[Union[str, list[str]]] = None,
|
||||||
**kwargs
|
**kwargs
|
||||||
@ -926,7 +1041,7 @@ async def _svg_from_prompt(prompt: str, w: int, h: int, background: str="white")
|
|||||||
f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.")
|
f"- Thema: {prompt}\n- Gebruik eenvoudige vormen/paths/tekst.")
|
||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":sys},{"role":"user","content":user}],
|
[{"role":"system","content":sys},{"role":"user","content":user}],
|
||||||
stream=False, temperature=0.35, top_p=0.9, max_tokens=1200
|
stream=False, temperature=0.35, top_p=0.9, max_tokens=2048
|
||||||
)
|
)
|
||||||
svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
svg = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
||||||
return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background)
|
return _svg_wrap_if_needed(_sanitize_svg(svg), w, h, background)
|
||||||
@ -1079,6 +1194,165 @@ def _parse_repo_qa_from_messages(messages: list[dict]) -> tuple[Optional[str], s
|
|||||||
question = full
|
question = full
|
||||||
return repo_hint, question, branch, n_ctx
|
return repo_hint, question, branch, n_ctx
|
||||||
|
|
||||||
|
# ------------------------------
|
||||||
|
# Unified-diff "fast path" (expliciete paden + vervangtekst)
|
||||||
|
# ------------------------------
|
||||||
|
_Q = r"[\"'“”‘’`]"
|
||||||
|
_PATH_PATS = [
|
||||||
|
r"[\"“”'](resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)[\"”']",
|
||||||
|
r"(resources\/[A-Za-z0-9_\/\.-]+\.blade\.php)",
|
||||||
|
r"[\"“”'](app\/[A-Za-z0-9_\/\.-]+\.php)[\"”']",
|
||||||
|
r"(app\/[A-Za-z0-9_\/\.-]+\.php)",
|
||||||
|
]
|
||||||
|
|
||||||
|
def _extract_repo_branch(text: str) -> tuple[Optional[str], str]:
|
||||||
|
repo_url = None
|
||||||
|
branch = "main"
|
||||||
|
m = re.search(r"\bRepo\s*:\s*(\S+)", text, flags=re.I)
|
||||||
|
if m:
|
||||||
|
repo_url = m.group(1).strip()
|
||||||
|
mb = re.search(r"\bbranch\s*:\s*([A-Za-z0-9._/-]+)", text, flags=re.I)
|
||||||
|
if mb:
|
||||||
|
branch = mb.group(1).strip()
|
||||||
|
return repo_url, branch
|
||||||
|
|
||||||
|
def _extract_paths(text: str) -> list[str]:
|
||||||
|
paths: set[str] = set()
|
||||||
|
for pat in _PATH_PATS:
|
||||||
|
for m in re.finditer(pat, text):
|
||||||
|
paths.add(m.group(1))
|
||||||
|
return list(paths)
|
||||||
|
|
||||||
|
def _extract_replace_pair(text: str) -> tuple[Optional[str], Optional[str]]:
|
||||||
|
"""
|
||||||
|
Haalt (old,new) uit NL/EN varianten. Ondersteunt rechte en ‘slimme’ quotes.
|
||||||
|
Voorbeelden die matchen:
|
||||||
|
- Vervang de tekst “A” in “B”
|
||||||
|
- Vervang de tekst "A" veranderen in "B"
|
||||||
|
- Replace the text 'A' to 'B'
|
||||||
|
- Replace "A" with "B"
|
||||||
|
"""
|
||||||
|
pats = [
|
||||||
|
rf"Vervang\s+de\s+tekst\s*{_Q}(.+?){_Q}[^.\n]*?(?:in|naar|verander(?:en)?\s+in)\s*{_Q}(.+?){_Q}",
|
||||||
|
rf"Replace(?:\s+the)?\s+text\s*{_Q}(.+?){_Q}\s*(?:to|with)\s*{_Q}(.+?){_Q}",
|
||||||
|
]
|
||||||
|
for p in pats:
|
||||||
|
m = re.search(p, text, flags=re.I|re.S)
|
||||||
|
if m:
|
||||||
|
return m.group(1), m.group(2)
|
||||||
|
# fallback: eerste twee quoted strings in de buurt van 'Vervang' / 'Replace'
|
||||||
|
mm = re.search(r"(Vervang|Replace)[\s\S]*?"+_Q+"(.+?)"+_Q+"[\s\S]*?"+_Q+"(.+?)"+_Q, text, flags=re.I)
|
||||||
|
if mm:
|
||||||
|
return mm.group(2), mm.group(3)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def _looks_like_unified_diff_request(text: str) -> bool:
|
||||||
|
if re.search(r"\bunified\s+diff\b", text, flags=re.I):
|
||||||
|
return True
|
||||||
|
# ook accepteren bij "maak een diff" met expliciete bestanden
|
||||||
|
if re.search(r"\b(diff|patch)\b", text, flags=re.I) and any(re.search(p, text) for p in _PATH_PATS):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _make_unified_diff(a_text: str, b_text: str, path: str) -> str:
|
||||||
|
import difflib
|
||||||
|
a_lines = a_text.splitlines(keepends=True)
|
||||||
|
b_lines = b_text.splitlines(keepends=True)
|
||||||
|
return "".join(difflib.unified_diff(a_lines, b_lines, fromfile=f"a/{path}", tofile=f"b/{path}", n=3))
|
||||||
|
|
||||||
|
_TRANS_WRAPPERS = [
|
||||||
|
r"__\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"),
|
||||||
|
r"@lang\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"),
|
||||||
|
r"trans\(\s*{q}({txt}){q}\s*\)".format(q=_Q, txt=r".+?"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _find_in_translation_wrapper(content: str, needle: str) -> bool:
|
||||||
|
for pat in _TRANS_WRAPPERS:
|
||||||
|
for m in re.finditer(pat, content):
|
||||||
|
inner = m.group(1)
|
||||||
|
if inner == needle:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _update_nl_json(root: Path, key: str, value: str) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Zet of update resources/lang/nl.json: { key: value }
|
||||||
|
Retourneert (gewijzigd, nieuwe_tekst).
|
||||||
|
"""
|
||||||
|
lang_path = root / "resources" / "lang" / "nl.json"
|
||||||
|
data = {}
|
||||||
|
if lang_path.exists():
|
||||||
|
try:
|
||||||
|
data = json.loads(lang_path.read_text(encoding="utf-8", errors="ignore") or "{}")
|
||||||
|
except Exception:
|
||||||
|
data = {}
|
||||||
|
# Alleen aanpassen als nodig
|
||||||
|
prev = data.get(key)
|
||||||
|
if prev == value:
|
||||||
|
return False, lang_path.as_posix()
|
||||||
|
data[key] = value
|
||||||
|
# Mooie, stabiele JSON dump
|
||||||
|
lang_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp = lang_path.with_suffix(".tmp.json")
|
||||||
|
tmp.write_text(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
|
||||||
|
os.replace(tmp, lang_path)
|
||||||
|
return True, lang_path.as_posix()
|
||||||
|
|
||||||
|
async def _fast_unified_diff_task(messages: list[dict]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Als de prompt vraagt om 'unified diff' met expliciete bestanden en vervangtekst,
|
||||||
|
voer dat direct uit (bypass handle_repo_agent) en geef diffs terug.
|
||||||
|
"""
|
||||||
|
full = "\n".join([m.get("content","") for m in messages if m.get("role") in ("system","user")])
|
||||||
|
if not _looks_like_unified_diff_request(full):
|
||||||
|
return None
|
||||||
|
repo_url, branch = _extract_repo_branch(full)
|
||||||
|
paths = _extract_paths(full)
|
||||||
|
old, new = _extract_replace_pair(full)
|
||||||
|
if not (repo_url and paths and old and new):
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Repo ophalen
|
||||||
|
repo_path = await _get_git_repo_async(repo_url, branch)
|
||||||
|
root = Path(repo_path)
|
||||||
|
changed_files: list[tuple[str, str]] = [] # (path, diff_text)
|
||||||
|
lang_changed = False
|
||||||
|
lang_path_text = ""
|
||||||
|
|
||||||
|
for rel in paths:
|
||||||
|
p = root / rel
|
||||||
|
if not p.exists():
|
||||||
|
continue
|
||||||
|
before = _read_text_file_wrapper(p)
|
||||||
|
if not before:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Als de tekst binnen een vertaal-wrapper staat, wijzig NIET de blade,
|
||||||
|
# maar zet/patch resources/lang/nl.json: { "oude": "nieuwe" }.
|
||||||
|
if _find_in_translation_wrapper(before, old):
|
||||||
|
ch, lang_path_text = _update_nl_json(root, old, new)
|
||||||
|
lang_changed = lang_changed or ch
|
||||||
|
# geen bladewijziging in dit geval
|
||||||
|
continue
|
||||||
|
|
||||||
|
after = before.replace(old, new)
|
||||||
|
if after == before:
|
||||||
|
continue
|
||||||
|
diff = _make_unified_diff(before, after, rel)
|
||||||
|
if diff.strip():
|
||||||
|
changed_files.append((rel, diff))
|
||||||
|
|
||||||
|
if not changed_files and not lang_changed:
|
||||||
|
return "Dry-run: geen wijzigbare treffers gevonden in opgegeven bestanden (of reeds actueel)."
|
||||||
|
|
||||||
|
out = []
|
||||||
|
if changed_files:
|
||||||
|
out.append("### Unified diffs")
|
||||||
|
for rel, d in changed_files:
|
||||||
|
out.append(f"```diff\n{d}```")
|
||||||
|
if lang_changed:
|
||||||
|
out.append(f"✅ Bijgewerkte vertaling in: `{lang_path_text}` → \"{old}\" → \"{new}\"")
|
||||||
|
return "\n\n".join(out).strip()
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# OpenAI-compatible endpoints
|
# OpenAI-compatible endpoints
|
||||||
@ -1131,15 +1405,17 @@ def _get_stt_model():
|
|||||||
except Exception:
|
except Exception:
|
||||||
raise HTTPException(status_code=500, detail="STT niet beschikbaar: faster-whisper ontbreekt.")
|
raise HTTPException(status_code=500, detail="STT niet beschikbaar: faster-whisper ontbreekt.")
|
||||||
# device-selectie
|
# device-selectie
|
||||||
|
download_root = os.getenv("STT_MODEL_DIR", os.environ.get("WHISPER_CACHE_DIR"))
|
||||||
|
os.makedirs(download_root, exist_ok=True)
|
||||||
if STT_DEVICE == "auto":
|
if STT_DEVICE == "auto":
|
||||||
try:
|
try:
|
||||||
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cuda", compute_type="float16")
|
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cuda", compute_type="float16", download_root=download_root)
|
||||||
return _STT_MODEL
|
return _STT_MODEL
|
||||||
except Exception:
|
except Exception:
|
||||||
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cpu", compute_type="int8")
|
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device="cpu", compute_type="int8", download_root=download_root)
|
||||||
return _STT_MODEL
|
return _STT_MODEL
|
||||||
dev, comp = ("cuda","float16") if STT_DEVICE=="cuda" else ("cpu","int8")
|
dev, comp = ("cuda","float16") if STT_DEVICE=="cuda" else ("cpu","int8")
|
||||||
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device=dev, compute_type=comp)
|
_STT_MODEL = WhisperModel(STT_MODEL_NAME, device=dev, compute_type=comp, download_root=download_root)
|
||||||
return _STT_MODEL
|
return _STT_MODEL
|
||||||
|
|
||||||
def _stt_transcribe_path(path: str, lang: str | None):
|
def _stt_transcribe_path(path: str, lang: str | None):
|
||||||
@ -1246,7 +1522,7 @@ async def present_make(
|
|||||||
f"Max. {max_slides} dia's, 3–6 bullets per dia.")
|
f"Max. {max_slides} dia's, 3–6 bullets per dia.")
|
||||||
plan = await llm_call_openai_compat(
|
plan = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":sys},{"role":"user","content":user}],
|
[{"role":"system","content":sys},{"role":"user","content":user}],
|
||||||
stream=False, temperature=0.3, top_p=0.9, max_tokens=1200
|
stream=False, temperature=0.3, top_p=0.9, max_tokens=13021
|
||||||
)
|
)
|
||||||
raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
|
raw = (plan.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
|
||||||
try:
|
try:
|
||||||
@ -1292,7 +1568,7 @@ async def vision_ask(
|
|||||||
stream: bool = Form(False),
|
stream: bool = Form(False),
|
||||||
temperature: float = Form(0.2),
|
temperature: float = Form(0.2),
|
||||||
top_p: float = Form(0.9),
|
top_p: float = Form(0.9),
|
||||||
max_tokens: int = Form(512),
|
max_tokens: int = Form(1024),
|
||||||
):
|
):
|
||||||
raw = await run_in_threadpool(file.file.read)
|
raw = await run_in_threadpool(file.file.read)
|
||||||
img_b64 = base64.b64encode(raw).decode("utf-8")
|
img_b64 = base64.b64encode(raw).decode("utf-8")
|
||||||
@ -1323,7 +1599,7 @@ async def vision_and_text(
|
|||||||
max_chars: int = Form(25000),
|
max_chars: int = Form(25000),
|
||||||
temperature: float = Form(0.2),
|
temperature: float = Form(0.2),
|
||||||
top_p: float = Form(0.9),
|
top_p: float = Form(0.9),
|
||||||
max_tokens: int = Form(1024),
|
max_tokens: int = Form(2048),
|
||||||
):
|
):
|
||||||
images_b64: list[str] = []
|
images_b64: list[str] = []
|
||||||
text_chunks: list[str] = []
|
text_chunks: list[str] = []
|
||||||
@ -1370,7 +1646,7 @@ async def vision_health():
|
|||||||
tiny_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAOaO5nYAAAAASUVORK5CYII="
|
tiny_png = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAOaO5nYAAAAASUVORK5CYII="
|
||||||
try:
|
try:
|
||||||
messages = [{"role":"user","content":"<image> Beschrijf dit in één woord."}]
|
messages = [{"role":"user","content":"<image> Beschrijf dit in één woord."}]
|
||||||
resp = await llm_call_openai_compat(messages, extra={"images":[tiny_png]}, max_tokens=16)
|
resp = await llm_call_openai_compat(messages, extra={"images":[tiny_png]}, max_tokens=128)
|
||||||
txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","").strip()
|
txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","").strip()
|
||||||
return {"vision": bool(txt), "sample": txt[:60]}
|
return {"vision": bool(txt), "sample": txt[:60]}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -1564,7 +1840,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":"Je bent behulpzaam en exact."},
|
[{"role":"system","content":"Je bent behulpzaam en exact."},
|
||||||
{"role":"user","content": f"{instruction}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
{"role":"user","content": f"{instruction}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
||||||
stream=False, max_tokens=768
|
stream=False, max_tokens=7680
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@ -1574,7 +1850,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":"Wees feitelijk en concreet."},
|
[{"role":"system","content":"Wees feitelijk en concreet."},
|
||||||
{"role":"user","content": f"{goal}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
{"role":"user","content": f"{goal}\n\n--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
||||||
stream=False, max_tokens=768
|
stream=False, max_tokens=7680
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@ -1587,7 +1863,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
{"role":"user","content": f"{objective}\nStijl/criteria: {style}\n"
|
{"role":"user","content": f"{objective}\nStijl/criteria: {style}\n"
|
||||||
"Antwoord met eerst een korte toelichting, daarna alleen de verbeterde inhoud tussen een codeblok.\n\n"
|
"Antwoord met eerst een korte toelichting, daarna alleen de verbeterde inhoud tussen een codeblok.\n\n"
|
||||||
f"--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
f"--- BEGIN ---\n{text}\n--- EINDE ---"}],
|
||||||
stream=False, max_tokens=1024
|
stream=False, max_tokens=10240
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@ -1597,7 +1873,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":"Wees strikt en concreet."},
|
[{"role":"system","content":"Wees strikt en concreet."},
|
||||||
{"role":"user","content": f"{VALIDATE_PROMPT}\n\nCode om te valideren:\n```\n{code}\n```"}],
|
{"role":"user","content": f"{VALIDATE_PROMPT}\n\nCode om te valideren:\n```\n{code}\n```"}],
|
||||||
stream=False, max_tokens=512
|
stream=False, max_tokens=10000
|
||||||
)
|
)
|
||||||
txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
txt = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","")
|
||||||
return {"status":"issues_found" if _parse_validation_results(txt) else "valid",
|
return {"status":"issues_found" if _parse_validation_results(txt) else "valid",
|
||||||
@ -1611,7 +1887,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
[{"role":"system","content": SYSTEM_PROMPT},
|
[{"role":"system","content": SYSTEM_PROMPT},
|
||||||
{"role":"user","content": f"Verbeter deze {language}-code met focus op {focus}:\n\n"
|
{"role":"user","content": f"Verbeter deze {language}-code met focus op {focus}:\n\n"
|
||||||
f"{code}\n\nGeef eerst een korte toelichting, dan alleen het verbeterde codeblok. Behoud functionaliteit."}],
|
f"{code}\n\nGeef eerst een korte toelichting, dan alleen het verbeterde codeblok. Behoud functionaliteit."}],
|
||||||
stream=False, max_tokens=1536
|
stream=False, max_tokens=10768
|
||||||
)
|
)
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
@ -1636,7 +1912,7 @@ async def _execute_tool(name: str, args: dict) -> dict:
|
|||||||
if name == "vision_analyze":
|
if name == "vision_analyze":
|
||||||
image_url = args.get("image_url","")
|
image_url = args.get("image_url","")
|
||||||
prompt = args.get("prompt","Beschrijf beknopt wat je ziet en noem de belangrijkste details.")
|
prompt = args.get("prompt","Beschrijf beknopt wat je ziet en noem de belangrijkste details.")
|
||||||
max_tokens = int(args.get("max_tokens",512))
|
max_tokens = int(args.get("max_tokens",1024))
|
||||||
b64 = None
|
b64 = None
|
||||||
# Alleen data: of raw base64 accepteren; http(s) niet, want die worden niet
|
# Alleen data: of raw base64 accepteren; http(s) niet, want die worden niet
|
||||||
# ingeladen in de vision-call en zouden stil falen.
|
# ingeladen in de vision-call en zouden stil falen.
|
||||||
@ -1908,6 +2184,52 @@ async def _complete_with_autocontinue(
|
|||||||
continues += 1
|
continues += 1
|
||||||
return content, resp
|
return content, resp
|
||||||
|
|
||||||
|
async def llm_call_autocont(
|
||||||
|
messages: list[dict],
|
||||||
|
*,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
stream: bool = False,
|
||||||
|
temperature: float = 0.2,
|
||||||
|
top_p: float = 0.9,
|
||||||
|
max_tokens: int = 1024,
|
||||||
|
extra: Optional[dict] = None,
|
||||||
|
stop: Optional[Union[str, list[str]]] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> dict | StreamingResponse:
|
||||||
|
"""
|
||||||
|
OpenAI-compatibele wrapper die non-stream antwoorden automatisch
|
||||||
|
doorcontinueert met jouw _complete_with_autocontinue(...).
|
||||||
|
- stream=True -> passthrough naar llm_call_openai_compat (ongewijzigd)
|
||||||
|
- stream=False -> retourneert één samengevoegd antwoord als OpenAI JSON
|
||||||
|
"""
|
||||||
|
mdl = (model or os.getenv("LLM_MODEL", "mistral-medium")).strip()
|
||||||
|
if stream:
|
||||||
|
# streaming blijft 1-op-1 zoals voorheen
|
||||||
|
return await llm_call_openai_compat(
|
||||||
|
messages,
|
||||||
|
model=mdl,
|
||||||
|
stream=True,
|
||||||
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
extra=extra if extra else None,
|
||||||
|
stop=stop
|
||||||
|
)
|
||||||
|
|
||||||
|
max_autocont = int(os.getenv("LLM_AUTO_CONTINUES", "2"))
|
||||||
|
full_text, _last = await _complete_with_autocontinue(
|
||||||
|
messages,
|
||||||
|
model=mdl,
|
||||||
|
temperature=temperature,
|
||||||
|
top_p=top_p,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
extra_payload=extra if extra else None,
|
||||||
|
max_autocont=max_autocont
|
||||||
|
)
|
||||||
|
# Bouw een standaard OpenAI-chat response met het samengevoegde antwoord
|
||||||
|
return _openai_chat_response(mdl, full_text, messages)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
@app.post("/v1/chat/completions")
|
||||||
async def openai_chat_completions(body: dict = Body(...), request: Request = None):
|
async def openai_chat_completions(body: dict = Body(...), request: Request = None):
|
||||||
model = (body.get("model") or os.getenv("LLM_MODEL", "mistral-medium")).strip()
|
model = (body.get("model") or os.getenv("LLM_MODEL", "mistral-medium")).strip()
|
||||||
@ -2053,7 +2375,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
# jouw bestaande helper; hou 'm zoals je al gebruikt
|
# jouw bestaande helper; hou 'm zoals je al gebruikt
|
||||||
resp = await llm_call_openai_compat(ask, stream=False, max_tokens=512)
|
resp = await llm_call_openai_compat(ask, stream=False, max_tokens=512)
|
||||||
txt = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content", "") or ""
|
txt = ((resp.get("choices") or [{}])[0].get("message") or {}).get("content", "") or ""
|
||||||
calls = _extract_tool_calls_from_text(txt)
|
calls = detect_toolcalls_any(txt) #_extract_tool_calls_from_text(txt)
|
||||||
if calls:
|
if calls:
|
||||||
return {
|
return {
|
||||||
"id": f"chatcmpl-{uuid.uuid4().hex}",
|
"id": f"chatcmpl-{uuid.uuid4().hex}",
|
||||||
@ -2075,6 +2397,17 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
|
|
||||||
# Speciale modellen
|
# Speciale modellen
|
||||||
if model == "repo-agent":
|
if model == "repo-agent":
|
||||||
|
# === snelle bypass voor "unified diff" opdrachten met expliciete paden ===
|
||||||
|
try:
|
||||||
|
fast = await _fast_unified_diff_task(messages)
|
||||||
|
except Exception as e:
|
||||||
|
fast = f"(Fast-diff pad mislukte: {e})"
|
||||||
|
if fast:
|
||||||
|
if stream:
|
||||||
|
async def _emit(): yield ("data: " + json.dumps({"id": f"chatcmpl-{uuid.uuid4().hex[:12]}", "object":"chat.completion.chunk","created": int(time.time()),"model":"repo-agent","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":None}]}) + "\n\n").encode("utf-8"); yield ("data: " + json.dumps({"id": f"chatcmpl-{uuid.uuid4().hex[:12]}","object":"chat.completion.chunk","created": int(time.time()),"model":"repo-agent","choices":[{"index":0,"delta":{"content": fast},"finish_reason":None}]}) + "\n\n").encode("utf-8"); yield b"data: [DONE]\n\n"
|
||||||
|
return StreamingResponse(_emit(), media_type="text/event-stream", headers={"Cache-Control":"no-cache, no-transform","X-Accel-Buffering":"no","Connection":"keep-alive"})
|
||||||
|
return JSONResponse(_openai_chat_response("repo-agent", fast, messages))
|
||||||
|
|
||||||
if stream:
|
if stream:
|
||||||
async def event_gen():
|
async def event_gen():
|
||||||
import asyncio, time, json, contextlib
|
import asyncio, time, json, contextlib
|
||||||
@ -2421,7 +2754,7 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
if stream:
|
if stream:
|
||||||
temperature = float(body.get("temperature", 0.2))
|
temperature = float(body.get("temperature", 0.2))
|
||||||
top_p = float(body.get("top_p", 0.9))
|
top_p = float(body.get("top_p", 0.9))
|
||||||
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "1024"))
|
_default_max = int(os.getenv("LLM_DEFAULT_MAX_TOKENS", "13021"))
|
||||||
max_tokens = int(body.get("max_tokens", _default_max))
|
max_tokens = int(body.get("max_tokens", _default_max))
|
||||||
return await llm_call_openai_compat(
|
return await llm_call_openai_compat(
|
||||||
messages,
|
messages,
|
||||||
@ -3331,7 +3664,7 @@ async def rag_query_api(
|
|||||||
resp = await llm_call_openai_compat(
|
resp = await llm_call_openai_compat(
|
||||||
[{"role":"system","content":"You are precise and return only valid JSON."},
|
[{"role":"system","content":"You are precise and return only valid JSON."},
|
||||||
{"role":"user","content": prompt+"\n\nOnly JSON array."}],
|
{"role":"user","content": prompt+"\n\nOnly JSON array."}],
|
||||||
stream=False, temperature=0.0, top_p=1.0, max_tokens=256
|
stream=False, temperature=0.0, top_p=1.0, max_tokens=512
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]"))
|
order = json.loads((resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]"))
|
||||||
@ -3428,7 +3761,8 @@ initialize_agent(
|
|||||||
get_git_repo_fn=_get_git_repo_async,
|
get_git_repo_fn=_get_git_repo_async,
|
||||||
rag_index_repo_internal_fn=_rag_index_repo_internal,
|
rag_index_repo_internal_fn=_rag_index_repo_internal,
|
||||||
rag_query_internal_fn=_rag_query_internal,
|
rag_query_internal_fn=_rag_query_internal,
|
||||||
llm_call_fn=llm_call_openai_compat,
|
# Gebruik auto-continue wrapper zodat agent-antwoorden niet worden afgekapt
|
||||||
|
llm_call_fn=llm_call_autocont,
|
||||||
extract_code_block_fn=extract_code_block,
|
extract_code_block_fn=extract_code_block,
|
||||||
read_text_file_fn=_read_text_file_wrapper,
|
read_text_file_fn=_read_text_file_wrapper,
|
||||||
client_ip_fn=_client_ip,
|
client_ip_fn=_client_ip,
|
||||||
|
|||||||
131
llm_client.py
Executable file
131
llm_client.py
Executable file
@ -0,0 +1,131 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from queue_helper import QueueManager
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# Config voor onderliggende LLM-backend
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# Dit is NIET jouw eigen /v1/chat/completions endpoint,
|
||||||
|
# maar de *echte* model-backend (bijv. Ollama, vLLM, Mistral server, etc.).
|
||||||
|
LLM_API_BASE = os.getenv("LLM_API_BASE", "http://127.0.0.1:11434")
|
||||||
|
LLM_DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
|
||||||
|
LLM_REQUEST_TIMEOUT = float(os.getenv("LLM_REQUEST_TIMEOUT", "120"))
|
||||||
|
|
||||||
|
# Deze wordt in app.py gezet via init_llm_client(...)
|
||||||
|
LLM_QUEUE: QueueManager | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def init_llm_client(queue: QueueManager) -> None:
|
||||||
|
"""
|
||||||
|
Koppel de globale LLM_QUEUE aan de QueueManager uit app.py.
|
||||||
|
Deze MOET je in app.py één keer aanroepen.
|
||||||
|
"""
|
||||||
|
global LLM_QUEUE
|
||||||
|
LLM_QUEUE = queue
|
||||||
|
logger.info("llm_client: LLM_QUEUE gekoppeld via init_llm_client.")
|
||||||
|
|
||||||
|
|
||||||
|
def _sync_model_infer(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Synchronous call naar de echte LLM-backend.
|
||||||
|
Dit is de functie die je in app.py gebruikt bij het maken van de QueueManager.
|
||||||
|
"""
|
||||||
|
url = f"{LLM_API_BASE.rstrip('/')}/v1/chat/completions"
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=LLM_REQUEST_TIMEOUT) as client:
|
||||||
|
resp = client.post(url, json=payload)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("LLM backend call failed: %s", exc)
|
||||||
|
return {
|
||||||
|
"object": "chat.completion",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "error",
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": f"[LLM-fout] {exc}",
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 0,
|
||||||
|
"completion_tokens": 0,
|
||||||
|
"total_tokens": 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _llm_call(
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
*,
|
||||||
|
stream: bool = False,
|
||||||
|
temperature: float = 0.2,
|
||||||
|
top_p: float = 0.9,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
**extra: Any,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Centrale helper voor tools/agents/smart_rag/repo-agent.
|
||||||
|
|
||||||
|
Belangrijk:
|
||||||
|
- Gebruikt de *bestaande* QueueManager uit app.py (via init_llm_client).
|
||||||
|
- Stuurt jobs in de agent-queue (lagere prioriteit dan users).
|
||||||
|
- GEEN wachtrij-meldingen ("u bent #...") voor deze interne calls.
|
||||||
|
"""
|
||||||
|
if stream:
|
||||||
|
# In deze agent gebruiken we geen streaming.
|
||||||
|
raise NotImplementedError("_llm_call(stream=True) wordt momenteel niet ondersteund.")
|
||||||
|
|
||||||
|
if LLM_QUEUE is None:
|
||||||
|
# Hard fail: dan weet je meteen dat init_llm_client nog niet is aangeroepen.
|
||||||
|
raise RuntimeError("LLM_QUEUE is niet geïnitialiseerd. Roep init_llm_client(...) aan in app.py")
|
||||||
|
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"model": model or LLM_DEFAULT_MODEL,
|
||||||
|
"messages": messages,
|
||||||
|
"stream": False,
|
||||||
|
"temperature": float(temperature),
|
||||||
|
"top_p": float(top_p),
|
||||||
|
}
|
||||||
|
if max_tokens is not None:
|
||||||
|
payload["max_tokens"] = int(max_tokens)
|
||||||
|
|
||||||
|
payload.update(extra)
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# request_agent_sync blokkeert → naar threadpool
|
||||||
|
response: Dict[str, Any] = await loop.run_in_executor(
|
||||||
|
None, lambda: LLM_QUEUE.request_agent_sync(payload)
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("_llm_call via agent-queue failed: %s", exc)
|
||||||
|
return {
|
||||||
|
"object": "chat.completion",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "error",
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": f"[LLM-queue-fout] {exc}",
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 0,
|
||||||
|
"completion_tokens": 0,
|
||||||
|
"total_tokens": 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
1
mistral-api.sh
Executable file
1
mistral-api.sh
Executable file
@ -0,0 +1 @@
|
|||||||
|
docker run -d --name mistral-api --network host -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=1 -e LLM_CONTEXT_TOKENS=13021 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=2 -e LLM_FUNCTION_CALLING_MODE=shim -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ mistral-api
|
||||||
@ -45,11 +45,20 @@ class QueueManager:
|
|||||||
self._worker.start()
|
self._worker.start()
|
||||||
|
|
||||||
# ---------- public API ----------
|
# ---------- public API ----------
|
||||||
def enqueue_user(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> tuple[str, int]:
|
def enqueue_user(
|
||||||
|
self,
|
||||||
|
payload: Dict,
|
||||||
|
progress_cb: Callable[[Dict], None],
|
||||||
|
*,
|
||||||
|
notify_position: bool = False,
|
||||||
|
) -> tuple[str, int]:
|
||||||
job = _Job(payload, progress_cb)
|
job = _Job(payload, progress_cb)
|
||||||
try: self._user_q.put_nowait(job)
|
try: self._user_q.put_nowait(job)
|
||||||
except queue.Full: raise RuntimeError(f"User‑queue vol (≥{USER_MAX_QUEUE})")
|
except queue.Full: raise RuntimeError(f"User‑queue vol (≥{USER_MAX_QUEUE})")
|
||||||
position = self._user_q.qsize()
|
position = self._user_q.qsize()
|
||||||
|
if notify_position:
|
||||||
|
# start een aparte notifier-thread die periodiek de wachtrijpositie meldt
|
||||||
|
start_position_notifier(job, self._user_q)
|
||||||
return job.job_id, position
|
return job.job_id, position
|
||||||
|
|
||||||
def enqueue_agent(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> str:
|
def enqueue_agent(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> str:
|
||||||
@ -58,6 +67,32 @@ class QueueManager:
|
|||||||
except queue.Full: raise RuntimeError(f"Agent‑queue vol (≥{AGENT_MAX_QUEUE})")
|
except queue.Full: raise RuntimeError(f"Agent‑queue vol (≥{AGENT_MAX_QUEUE})")
|
||||||
return job.job_id
|
return job.job_id
|
||||||
|
|
||||||
|
# ---------- sync helper voor agents/tools ----------
|
||||||
|
def request_agent_sync(self, payload: Dict, timeout: float = WORKER_TIMEOUT) -> Dict:
|
||||||
|
"""
|
||||||
|
Gebruik dit voor interne calls (agents/tools).
|
||||||
|
- Job wordt in de agent-queue gezet (lagere prioriteit dan users).
|
||||||
|
- We wachten blokkerend tot de worker klaar is of tot timeout.
|
||||||
|
- Er worden GEEN wachtrij-meldingen ("U bent #...") verstuurd.
|
||||||
|
"""
|
||||||
|
result_box: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
def _cb(msg: Dict):
|
||||||
|
# alleen het eindresultaat is interessant voor tools/agents
|
||||||
|
result_box["answer"] = msg
|
||||||
|
|
||||||
|
job = _Job(payload, _cb)
|
||||||
|
try:
|
||||||
|
self._agent_q.put_nowait(job)
|
||||||
|
except queue.Full:
|
||||||
|
raise RuntimeError(f"Agent-queue vol (≥{AGENT_MAX_QUEUE})")
|
||||||
|
|
||||||
|
ok = job.event.wait(timeout)
|
||||||
|
if not ok:
|
||||||
|
raise TimeoutError(f"LLM-inference duurde langer dan {timeout} seconden.")
|
||||||
|
if job.error:
|
||||||
|
raise RuntimeError(job.error)
|
||||||
|
return result_box.get("answer") or {}
|
||||||
# ---------- worker ----------
|
# ---------- worker ----------
|
||||||
def _run_worker(self):
|
def _run_worker(self):
|
||||||
while not self._shutdown.is_set():
|
while not self._shutdown.is_set():
|
||||||
@ -79,18 +114,24 @@ class QueueManager:
|
|||||||
self._shutdown.set()
|
self._shutdown.set()
|
||||||
self._worker.join(timeout=5)
|
self._worker.join(timeout=5)
|
||||||
|
|
||||||
def start_position_notifier(job: _Job,
|
def start_position_notifier(
|
||||||
|
job: _Job,
|
||||||
queue_ref: queue.Queue,
|
queue_ref: queue.Queue,
|
||||||
interval: float = UPDATE_INTERVAL):
|
interval: float = UPDATE_INTERVAL,
|
||||||
|
):
|
||||||
"""Stuurt elke `interval` seconden een bericht met de huidige positie."""
|
"""Stuurt elke `interval` seconden een bericht met de huidige positie."""
|
||||||
def _notifier():
|
def _notifier():
|
||||||
while not job.event.is_set():
|
# Stop zodra het job-event wordt gezet (success/fout/timeout upstream)
|
||||||
|
while not job.event.wait(interval):
|
||||||
|
# Neem een snapshot van de queue-inhoud op een thread-safe manier
|
||||||
|
with queue_ref.mutex:
|
||||||
|
snapshot = list(queue_ref.queue)
|
||||||
try:
|
try:
|
||||||
pos = list(queue_ref.queue).index(job) + 1 # 1‑based
|
pos = snapshot.index(job) + 1 # 1-based
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
# Job staat niet meer in de wachtrij → geen updates meer nodig
|
||||||
break
|
break
|
||||||
job.callback({"info": f"U bent #{pos} in de wachtrij. Even geduld…" })
|
job.callback({"info": f"U bent #{pos} in de wachtrij. Even geduld…" })
|
||||||
time.sleep(interval)
|
|
||||||
t = threading.Thread(target=_notifier, daemon=True)
|
t = threading.Thread(target=_notifier, daemon=True)
|
||||||
t.start()
|
t.start()
|
||||||
return t
|
return t
|
||||||
|
|||||||
10
smart_rag.py
10
smart_rag.py
@ -296,7 +296,7 @@ async def hybrid_retrieve(
|
|||||||
# Optionele query-routing + RRF
|
# Optionele query-routing + RRF
|
||||||
use_route = str(os.getenv("RAG_ROUTE", "1")).lower() not in ("0", "false")
|
use_route = str(os.getenv("RAG_ROUTE", "1")).lower() not in ("0", "false")
|
||||||
use_rrf = str(os.getenv("RAG_RRF", "1")).lower() not in ("0", "false")
|
use_rrf = str(os.getenv("RAG_RRF", "1")).lower() not in ("0", "false")
|
||||||
# Optionele mini multi-query expansion (default uit)
|
# Optionele mini multi-query expansion (default aan)
|
||||||
use_expand = str(os.getenv("RAG_MULTI_EXPAND", "1")).lower() in ("1","true","yes")
|
use_expand = str(os.getenv("RAG_MULTI_EXPAND", "1")).lower() in ("1","true","yes")
|
||||||
k_variants = max(1, int(os.getenv("RAG_MULTI_K", "3")))
|
k_variants = max(1, int(os.getenv("RAG_MULTI_K", "3")))
|
||||||
per_query_k = max(1, int(per_query_k))
|
per_query_k = max(1, int(per_query_k))
|
||||||
@ -317,10 +317,14 @@ async def hybrid_retrieve(
|
|||||||
if use_route:
|
if use_route:
|
||||||
buckets = _route_query_buckets(qv)
|
buckets = _route_query_buckets(qv)
|
||||||
for b in buckets:
|
for b in buckets:
|
||||||
|
# combineer globale path_contains-hint met bucket-specifieke filter
|
||||||
|
pc = b.get("path_contains")
|
||||||
|
if path_contains and not pc:
|
||||||
|
pc = path_contains
|
||||||
res = await rag_query_internal_fn(
|
res = await rag_query_internal_fn(
|
||||||
query=b["q"], n_results=per_query_k,
|
query=b["q"], n_results=per_query_k,
|
||||||
collection_name=collection_name,
|
collection_name=collection_name,
|
||||||
repo=repo, path_contains=b["path_contains"], profile=profile
|
repo=repo, path_contains=pc, profile=profile
|
||||||
)
|
)
|
||||||
lst = []
|
lst = []
|
||||||
for item in (res or {}).get("results", []):
|
for item in (res or {}).get("results", []):
|
||||||
@ -594,5 +598,7 @@ __all__ = [
|
|||||||
"expand_queries",
|
"expand_queries",
|
||||||
"hybrid_retrieve",
|
"hybrid_retrieve",
|
||||||
"assemble_context",
|
"assemble_context",
|
||||||
|
"_laravel_pairs_from_route_text",
|
||||||
|
"_laravel_guess_view_paths_from_text",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
# windowing_utils.py
|
# windowing_utils.py
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import List, Dict, Callable, Optional, Tuple
|
from typing import List, Dict, Callable, Optional, Tuple, Awaitable
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@ -64,7 +64,7 @@ class ConversationWindow:
|
|||||||
async def build_within_budget(
|
async def build_within_budget(
|
||||||
self,
|
self,
|
||||||
system_prompt: Optional[str],
|
system_prompt: Optional[str],
|
||||||
summarizer: Optional[Callable[[str, List[Dict]], "awaitable[str]"]] = None
|
summarizer: Optional[Callable[[str, List[Dict]], Awaitable[str]]] = None
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
budget = self.max_ctx_tokens - max(1, self.response_reserve)
|
budget = self.max_ctx_tokens - max(1, self.response_reserve)
|
||||||
working = self.history[:]
|
working = self.history[:]
|
||||||
@ -106,6 +106,11 @@ class ConversationWindow:
|
|||||||
self.running_summary = await summarizer(self.running_summary, chunk_buf)
|
self.running_summary = await summarizer(self.running_summary, chunk_buf)
|
||||||
chunk_buf = []
|
chunk_buf = []
|
||||||
|
|
||||||
|
# verwerk eventuele overgebleven buffer zodat er geen turns verdwijnen
|
||||||
|
if chunk_buf:
|
||||||
|
self.running_summary = await summarizer(self.running_summary, chunk_buf)
|
||||||
|
chunk_buf = []
|
||||||
|
|
||||||
self.history = working
|
self.history = working
|
||||||
return await build_candidate(self.running_summary, working)
|
return await build_candidate(self.running_summary, working)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user