first commit

This commit is contained in:
admin 2026-02-23 15:59:34 +01:00
commit cbfddf128e
16 changed files with 12018 additions and 0 deletions

108
Dockerfile Normal file
View File

@ -0,0 +1,108 @@
# ===== Base met CUDA11.8 + cuDNN + conda =====
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
WORKDIR /app
# Zorg dat conda libs altijd eerst gevonden worden
ENV LD_LIBRARY_PATH=/opt/conda/lib:${LD_LIBRARY_PATH}
# P5000 = Pascal SM 6.1; handig voor (eventueel) on-the-fly builds
ENV TORCH_CUDA_ARCH_LIST="6.1"
# ===== Model caches op vaste paden =====
ENV HF_HOME=/opt/hf \
HUGGINGFACE_HUB_CACHE=/opt/hf \
TRANSFORMERS_CACHE=/opt/hf \
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
XDG_CACHE_HOME=/opt/cache \
STT_MODEL=small
ARG RAG_EMBEDDINGS=gte-multilingual
ARG STT_MODEL_ARG=small
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
ENV STT_MODEL=${STT_MODEL_ARG}
# directories
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
# ===== Alleen minimale apt utils (géén multimedia libs!) =====
RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
apt-get install -y --no-install-recommends \
git curl build-essential ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# ===== Multimedia via conda-forge (alles uit één ecosysteem) =====
# - av 10 + ffmpeg<7 (past goed bij pyAV)
# - cairo/pango/gdk-pixbuf/pixman voor cairosvg stack
# VERVANG de vorige conda multimedia regel door deze:
# Tooling voor PyAV build
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
pkg-config git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential && rm -rf /var/lib/apt/lists/*
# FFmpeg via conda-forge (zodat je recente headers/libs hebt)
RUN conda config --system --set channel_priority flexible \
&& conda install -y -c conda-forge "ffmpeg>=6,<8" \
&& conda clean -afy
# Later in je pip stap:
# ... faster-whisper==1.0.0 zal av==11.* trekken en nu WEL kunnen bouwen tegen condas FFmpeg 6
# ===== Python deps =====
COPY requirements.txt .
RUN pip install --upgrade pip
# jouw requirements
RUN pip install --no-cache-dir -r requirements.txt
# losse extras (let op: av via conda, niet via pip!)
RUN pip install --no-cache-dir \
PyPDF2 python-multipart gitpython chromadb httpx meilisearch \
pandas openpyxl python-pptx faster-whisper==1.0.0 \
cairosvg sentence-transformers rank-bm25
# ===== Prefetch modellen =====
# 1) SentenceTransformers
RUN python - <<'PY'
import os
from sentence_transformers import SentenceTransformer
mapping = {
"gte-multilingual": "Alibaba-NLP/gte-multilingual-base",
"bge-small": "BAAI/bge-small-en-v1.5",
"e5-small": "intfloat/e5-small-v2",
"gte-base-en": "thenlper/gte-base",
}
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
local_dir = os.path.join(cache_root, "embedder")
os.makedirs(cache_root, exist_ok=True)
print("Downloading SentenceTransformer:", hf_id)
model = SentenceTransformer(hf_id, cache_folder=cache_root, device="cpu") # download only
model.save(local_dir)
print("Prefetched SentenceTransformer:", hf_id)
PY
# 2) faster-whisper (prefetch CPU-kant; runtime kan je device kiezen)
RUN python - <<'PY'
import os
from faster_whisper import WhisperModel
name = os.environ.get("STT_MODEL","small")
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
os.makedirs(cache_root, exist_ok=True)
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
print("Prefetched faster-whisper:", name, "->", cache_root)
PY
# (optioneel) piper skip ik hier; kan later
# ===== App code =====
COPY app.py .
COPY queue_helper.py .
COPY agent_repo.py .
COPY windowing_utils.py .
COPY smart_rag.py .
COPY llm_client.py .
COPY web_search.py .
EXPOSE 8080
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]

33
README.md Normal file
View File

@ -0,0 +1,33 @@
# Toolserver repo toolset v2
Deze patch voegt een duidelijkere repo-toolset toe, met een macro-tool die het "pas repo aan en push naar test-branch" proces in 1 toolcall doet.
## Nieuwe (aanbevolen) tools
- `repo_open(repo_url, branch="main") -> {workspace_id,...}`
- `repo_search(query, mode="auto|grep|rag", workspace_id? / repo_url?, n_results=20, ...)`
- `repo_read(path, workspace_id? / repo_url?, start_line?, end_line?)`
- `repo_apply(workspace_id? / repo_url?, patch_b64|patch|files, dry_run=false)`
- `repo_push(workspace_id? / repo_url?, base_branch="main", new_branch?, branch_prefix="test", commit_message=...)`
- `repo_pr_create(repo_url, head_branch, base_branch="main", title, body?)`
- `repo_change_to_branch(repo_url, base_branch="main", patch_b64|patch|files, new_branch?/branch_prefix, commit_message, create_pr?, pr_title?, pr_body?)`
## Deprecated (blijft werken)
- `repo_grep` -> alias naar `repo_search(mode="grep")`
- `rag_index_repo` -> meestal niet meer nodig; `repo_search` indexeert automatisch
- `rag_query` -> gebruik `repo_search(mode="rag")`
## Belangrijke env vars
- `LLM_PROXY_URL=http://192.168.100.1:8081/v1/completions` (of `/v1/chat/completions` als je proxy dat biedt)
- `GITEA_TOKEN=...` (alleen nodig voor PR create, en optioneel voor push auth als je repo_url geen creds bevat)
- `GITEA_USER=oauth2` (optioneel; default is oauth2 bij token-auth)
- `ALLOWED_GIT_HOSTS=10.25.138.40,192.168.100.1` (aanrader; leeg = alles toegestaan)
## Aanbevolen flow voor LLM
1) `repo_search(mode="auto")` om relevante bestanden te vinden
2) `repo_read` om de files te lezen
3) (LLM maakt patch)
4) `repo_change_to_branch` met `patch_b64` om te pushen naar een test-branch (+ evt PR)

33
README_TOOLSET_V2.md Normal file
View File

@ -0,0 +1,33 @@
# Toolserver repo toolset v2
Deze patch voegt een duidelijkere repo-toolset toe, met een macro-tool die het "pas repo aan en push naar test-branch" proces in 1 toolcall doet.
## Nieuwe (aanbevolen) tools
- `repo_open(repo_url, branch="main") -> {workspace_id,...}`
- `repo_search(query, mode="auto|grep|rag", workspace_id? / repo_url?, n_results=20, ...)`
- `repo_read(path, workspace_id? / repo_url?, start_line?, end_line?)`
- `repo_apply(workspace_id? / repo_url?, patch_b64|patch|files, dry_run=false)`
- `repo_push(workspace_id? / repo_url?, base_branch="main", new_branch?, branch_prefix="test", commit_message=...)`
- `repo_pr_create(repo_url, head_branch, base_branch="main", title, body?)`
- `repo_change_to_branch(repo_url, base_branch="main", patch_b64|patch|files, new_branch?/branch_prefix, commit_message, create_pr?, pr_title?, pr_body?)`
## Deprecated (blijft werken)
- `repo_grep` -> alias naar `repo_search(mode="grep")`
- `rag_index_repo` -> meestal niet meer nodig; `repo_search` indexeert automatisch
- `rag_query` -> gebruik `repo_search(mode="rag")`
## Belangrijke env vars
- `LLM_PROXY_URL=http://192.168.100.1:8081/v1/completions` (of `/v1/chat/completions` als je proxy dat biedt)
- `GITEA_TOKEN=...` (alleen nodig voor PR create, en optioneel voor push auth als je repo_url geen creds bevat)
- `GITEA_USER=oauth2` (optioneel; default is oauth2 bij token-auth)
- `ALLOWED_GIT_HOSTS=10.25.138.40,192.168.100.1` (aanrader; leeg = alles toegestaan)
## Aanbevolen flow voor LLM
1) `repo_search(mode="auto")` om relevante bestanden te vinden
2) `repo_read` om de files te lezen
3) (LLM maakt patch)
4) `repo_change_to_branch` met `patch_b64` om te pushen naar een test-branch (+ evt PR)

26
README_repo_push.txt Normal file
View File

@ -0,0 +1,26 @@
Toolserver patch + repo_push tool
Dit zipje bevat:
- app.py (toolserver-only + repo_push)
- llm_client.py
Nieuwe tool: repo_push
- Maakt een nieuwe branch (default: test/<timestamp>-<slug>) vanaf base_branch
- Past een unified diff toe (git apply)
- Commit + push naar origin
- Optioneel: create_pr (best-effort) via Gitea API
Belangrijke env vars:
- LLM_PROXY_URL="http://192.168.100.1:8081/v1/completions" (voor tools die LLM nodig hebben)
- ALLOWED_GIT_HOSTS="192.168.100.1,localhost,127.0.0.1" (veiligheidscheck; pas aan indien nodig)
Voor push-auth (HTTP):
- GITEA_USER="..." (of GIT_HTTP_USER)
- GITEA_TOKEN="..." (of GIT_HTTP_TOKEN)
Git identity voor commits:
- GIT_AUTHOR_NAME="toolserver"
- GIT_AUTHOR_EMAIL="toolserver@local"
Gebruik:
- tools worden automatisch beschikbaar via /openapi/repo_push en /v1/tools.

View File

@ -0,0 +1,19 @@
Toolserver patch
Wat verandert dit?
- app.py draait nu standaard als TOOLSERVER_ONLY=1:
* expose: /openapi.json + /openapi/* + /v1/tools + /v1/tools/call + /healthz + /metrics
* verwijdert alle overige routes (zoals /v1/chat/completions) uit de FastAPI router.
- Alle LLM-calls die tools/agents doen, gaan via llm_client -> QueueManager -> LLM_PROXY_URL.
Config:
export TOOLSERVER_ONLY=1
export LLM_PROXY_URL="http://192.168.100.1:8081/v1/completions"
export LLM_MODEL="mistral-medium" # of wat je proxy verwacht
Startvoorbeeld:
uvicorn app:app --host 0.0.0.0 --port 8080
Opmerking:
- Interne LLM streaming is uitgezet (tools zijn non-stream).
Als je later streaming nodig hebt voor agents, dan moeten we llm_client uitbreiden.

4887
agent_repo.py Normal file

File diff suppressed because it is too large Load Diff

5512
app.py Normal file

File diff suppressed because it is too large Load Diff

141
llm_client.py Normal file
View File

@ -0,0 +1,141 @@
from __future__ import annotations
import os
import asyncio
import logging
from typing import List, Dict, Any, Optional
import httpx
from queue_helper import QueueManager
logger = logging.getLogger(__name__)
# -------------------------------------------------------------
# Config voor onderliggende LLM-backend / proxy
# -------------------------------------------------------------
# Je kunt één van deze zetten:
# - LLM_PROXY_URL: volledige URL naar OpenAI-compat endpoint (bv. http://host:8081/v1/completions of /v1/chat/completions)
# - LLM_API_BASE : base-url (fallback). Dan gebruiken we /v1/chat/completions
LLM_PROXY_URL = (os.getenv("LLM_PROXY_URL") or "").strip()
LLM_API_BASE = os.getenv("LLM_API_BASE", "").strip() or "http://127.0.0.1:11434"
LLM_DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
LLM_REQUEST_TIMEOUT = float(os.getenv("LLM_REQUEST_TIMEOUT", "180"))
# Deze wordt in app.py gezet via init_llm_client(...)
LLM_QUEUE: QueueManager | None = None
def init_llm_client(queue: QueueManager) -> None:
global LLM_QUEUE
LLM_QUEUE = queue
logger.info("llm_client: LLM_QUEUE gekoppeld via init_llm_client.")
def _resolve_llm_url() -> str:
if LLM_PROXY_URL:
return LLM_PROXY_URL.rstrip("/")
# fallback: base -> chat completions
return f"{LLM_API_BASE.rstrip('/')}/v1/chat/completions"
def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str:
# eenvoudige, robuuste prompt-serialisatie voor /v1/completions proxies
parts: list[str] = []
for m in messages:
role = (m.get("role") or "user").upper()
content = m.get("content") or ""
parts.append(f"{role}: {content}")
parts.append("ASSISTANT:")
return "\n".join(parts)
def _chat_from_text(text: str) -> Dict[str, Any]:
return {
"object": "chat.completion",
"choices": [{
"index": 0,
"finish_reason": "stop",
"message": {"role": "assistant", "content": text},
}],
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
}
def _sync_model_infer(payload: Dict[str, Any]) -> Dict[str, Any]:
url = _resolve_llm_url()
try:
with httpx.Client(timeout=LLM_REQUEST_TIMEOUT) as client:
# detect endpoint type
is_chat = "/chat/" in url or url.endswith("/chat/completions")
if is_chat:
resp = client.post(url, json=payload)
resp.raise_for_status()
return resp.json()
# /v1/completions style
messages = payload.get("messages") or []
prompt = payload.get("prompt")
if not prompt:
prompt = _messages_to_prompt(messages)
comp_payload: Dict[str, Any] = {
"model": payload.get("model") or LLM_DEFAULT_MODEL,
"prompt": prompt,
"max_tokens": payload.get("max_tokens", 2048),
"temperature": payload.get("temperature", 0.2),
"top_p": payload.get("top_p", 0.9),
"stream": False,
}
# pass-through extras if present
for k in ("stop", "presence_penalty", "frequency_penalty"):
if k in payload:
comp_payload[k] = payload[k]
resp = client.post(url, json=comp_payload)
resp.raise_for_status()
data = resp.json()
# normalize to chat.completion
try:
choice = (data.get("choices") or [{}])[0]
txt = choice.get("text") or choice.get("message", {}).get("content") or ""
return _chat_from_text(txt)
except Exception:
return _chat_from_text(str(data)[:2000])
except Exception as exc:
logger.exception("LLM backend call failed: %s", exc)
return _chat_from_text(f"[LLM-fout] {exc}")
async def _llm_call(
messages: List[Dict[str, str]],
*,
stream: bool = False,
temperature: float = 0.2,
top_p: float = 0.9,
max_tokens: Optional[int] = None,
model: Optional[str] = None,
**extra: Any,
) -> Dict[str, Any]:
if stream:
raise NotImplementedError("_llm_call(stream=True) wordt momenteel niet ondersteund.")
if LLM_QUEUE is None:
raise RuntimeError("LLM_QUEUE is niet geïnitialiseerd. Roep init_llm_client(...) aan in app.py")
payload: Dict[str, Any] = {
"model": model or LLM_DEFAULT_MODEL,
"messages": messages,
"stream": False,
"temperature": float(temperature),
"top_p": float(top_p),
}
if max_tokens is not None:
payload["max_tokens"] = int(max_tokens)
payload.update(extra)
loop = asyncio.get_running_loop()
try:
response: Dict[str, Any] = await loop.run_in_executor(
None, lambda: LLM_QUEUE.request_agent_sync(payload)
)
return response
except Exception as exc:
logger.exception("_llm_call via agent-queue failed: %s", exc)
return _chat_from_text(f"[LLM-queue-fout] {exc}")

137
queue_helper.py Normal file
View File

@ -0,0 +1,137 @@
# -------------------------------------------------------------
# queue_helper.py minimalistisch threadbased wachtrijmanager
# -------------------------------------------------------------
import threading, queue, uuid, time
from typing import Callable, Any, Dict
# ------------------------------------------------------------------
# Configuratie pas eventueel aan
# ------------------------------------------------------------------
USER_MAX_QUEUE = 20 # max. wachtende gebruikers
AGENT_MAX_QUEUE = 50 # max. wachtende agents (stil)
UPDATE_INTERVAL = 10.0 # sec tussen “positionupdate” berichten
WORKER_TIMEOUT = 30.0 # max. tijd die een inference mag duren
# ------------------------------------------------------------------
class _Job:
__slots__ = ("job_id","payload","callback","created_at","event","result","error")
def __init__(self, payload: Dict, callback: Callable[[Dict], None]):
self.job_id = str(uuid.uuid4())
self.payload = payload
self.callback = callback
self.created_at = time.time()
self.event = threading.Event()
self.result = None
self.error = None
def set_success(self, answer: Dict):
self.result = answer
self.event.set()
self.callback(answer)
def set_error(self, exc: Exception):
self.error = str(exc)
self.event.set()
self.callback({"error": self.error})
class QueueManager:
"""Beheert één gedeelde queue + één workerthread."""
def __init__(self, model_infer_fn: Callable[[Dict], Dict]):
self._infer_fn = model_infer_fn
self._user_q = queue.Queue(maxsize=USER_MAX_QUEUE)
self._agent_q = queue.Queue(maxsize=AGENT_MAX_QUEUE)
self._shutdown = threading.Event()
self._worker = threading.Thread(target=self._run_worker,
daemon=True,
name="LLMworker")
self._worker.start()
# ---------- public API ----------
def enqueue_user(
self,
payload: Dict,
progress_cb: Callable[[Dict], None],
*,
notify_position: bool = False,
) -> tuple[str, int]:
job = _Job(payload, progress_cb)
try: self._user_q.put_nowait(job)
except queue.Full: raise RuntimeError(f"Userqueue vol (≥{USER_MAX_QUEUE})")
position = self._user_q.qsize()
if notify_position:
# start een aparte notifier-thread die periodiek de wachtrijpositie meldt
start_position_notifier(job, self._user_q)
return job.job_id, position
def enqueue_agent(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> str:
job = _Job(payload, progress_cb)
try: self._agent_q.put_nowait(job)
except queue.Full: raise RuntimeError(f"Agentqueue vol (≥{AGENT_MAX_QUEUE})")
return job.job_id
# ---------- sync helper voor agents/tools ----------
def request_agent_sync(self, payload: Dict, timeout: float = WORKER_TIMEOUT) -> Dict:
"""
Gebruik dit voor interne calls (agents/tools).
- Job wordt in de agent-queue gezet (lagere prioriteit dan users).
- We wachten blokkerend tot de worker klaar is of tot timeout.
- Er worden GEEN wachtrij-meldingen ("U bent #...") verstuurd.
"""
result_box: Dict[str, Any] = {}
def _cb(msg: Dict):
# alleen het eindresultaat is interessant voor tools/agents
result_box["answer"] = msg
job = _Job(payload, _cb)
try:
self._agent_q.put_nowait(job)
except queue.Full:
raise RuntimeError(f"Agent-queue vol (≥{AGENT_MAX_QUEUE})")
ok = job.event.wait(timeout)
if not ok:
raise TimeoutError(f"LLM-inference duurde langer dan {timeout} seconden.")
if job.error:
raise RuntimeError(job.error)
return result_box.get("answer") or {}
# ---------- worker ----------
def _run_worker(self):
while not self._shutdown.is_set():
job = self._pop_job(self._user_q) or self._pop_job(self._agent_q)
if not job:
time.sleep(0.1)
continue
try:
answer = self._infer_fn(job.payload)
job.set_success(answer)
except Exception as exc:
job.set_error(exc)
def _pop_job(self, q: queue.Queue):
try: return q.get_nowait()
except queue.Empty: return None
def stop(self):
self._shutdown.set()
self._worker.join(timeout=5)
def start_position_notifier(
job: _Job,
queue_ref: queue.Queue,
interval: float = UPDATE_INTERVAL,
):
"""Stuurt elke `interval` seconden een bericht met de huidige positie."""
def _notifier():
# Stop zodra het job-event wordt gezet (success/fout/timeout upstream)
while not job.event.wait(interval):
# Neem een snapshot van de queue-inhoud op een thread-safe manier
with queue_ref.mutex:
snapshot = list(queue_ref.queue)
try:
pos = snapshot.index(job) + 1 # 1-based
except ValueError:
# Job staat niet meer in de wachtrij → geen updates meer nodig
break
job.callback({"info": f"U bent #{pos} in de wachtrij. Even geduld…" })
t = threading.Thread(target=_notifier, daemon=True)
t.start()
return t

7
rebuild.sh Executable file
View File

@ -0,0 +1,7 @@
export HTTP_PROXY=http://192.168.100.2:8118
export HTTPS_PROXY=http://192.168.100.2:8118
export http_proxy=http://192.168.100.2:8118
export https_proxy=http://192.168.100.2:8118
docker stop toolserver
docker rm -f toolserver 2>/dev/null || true
docker build -t toolserver . --build-arg http_proxy=http://192.168.100.2:8118 --build-arg https_proxy=http://192.168.100.2:8118

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
fastapi
uvicorn[standard]
requests
python-docx
pypdf

604
smart_rag.py Normal file
View File

@ -0,0 +1,604 @@
# smart_rag.py
# Kleine util-laag voor intent + hybride retrieval + context-assemblage.
from __future__ import annotations
import os, re, json, math, hashlib
from typing import List, Dict, Tuple, DefaultDict, Optional
from collections import defaultdict
def _decamel(s: str) -> str:
s = re.sub(r"([a-z])([A-Z])", r"\1 \2", s)
s = s.replace("_", " ")
return re.sub(r"\s+", " ", s).strip()
def _symbol_guess(q: str) -> list[str]:
# pak langste 'code-achtig' token als symboolkandidaat
toks = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", q)
toks.sort(key=len, reverse=True)
return toks[:2]
def _simple_variants(q: str, max_k: int = 3) -> list[str]:
base = [q]
lo = q.lower().strip()
if lo and lo not in base:
base.append(lo)
dec = _decamel(q)
if dec and dec.lower() != lo and dec not in base:
base.append(dec)
syms = _symbol_guess(q)
for s in syms:
v = s.replace("_", " ")
if v not in base:
base.append(v)
v2 = s # raw symbool
if v2 not in base:
base.append(v2)
# cap
return base[: max(1, min(len(base), max_k))]
# --- Query routing + RRF fuse ---
def _route_query_buckets(q: str) -> list[dict]:
"""Hele lichte router: retourneert lijst subqueries met optionele path filters en boost."""
lo = (q or "").lower()
buckets = []
# Queue/Jobs/Event pipeline (Laravel)
if any(w in lo for w in ["job", "queue", "listener", "event", "dispatch"]):
buckets.append({"q": q, "path_contains": "app/Jobs", "boost": 1.18})
buckets.append({"q": q, "path_contains": "app/Listeners", "boost": 1.12})
buckets.append({"q": q, "path_contains": "app/Events", "boost": 1.10})
# Models / Migrations
if any(w in lo for w in ["model", "eloquent", "scope", "attribute"]):
buckets.append({"q": q, "path_contains": "app/Models", "boost": 1.12})
if any(w in lo for w in ["migration", "schema", "table", "column"]):
buckets.append({"q": q, "path_contains": "database/migrations", "boost": 1.08})
# Laravel/Blade/UI
if any(w in lo for w in ["blade", "view", "template", "button", "placeholder", "label"]):
buckets.append({"q": q, "path_contains": "resources/views", "boost": 1.2})
# Routes/controllers
if any(w in lo for w in ["route", "controller", "middleware", "api", "web.php", "controller@"]):
buckets.append({"q": q, "path_contains": "routes", "boost": 1.15})
buckets.append({"q": q, "path_contains": "app/Http/Controllers", "boost": 1.2})
# Config/ENV
if any(w in lo for w in ["env", "config", "database", "queue", "cache"]):
buckets.append({"q": q, "path_contains": "config", "boost": 1.15})
buckets.append({"q": q, "path_contains": ".env", "boost": 1.1})
# Docs/README
if any(w in lo for w in ["readme", "install", "setup", "document", "usage"]):
buckets.append({"q": q, "path_contains": "README", "boost": 1.05})
buckets.append({"q": q, "path_contains": "docs", "boost": 1.05})
# Fallback: generiek
buckets.append({"q": q, "path_contains": None, "boost": 1.0})
# dedup op (q, path_contains)
seen = set(); out = []
for b in buckets:
key = (b["q"], b["path_contains"])
if key in seen: continue
seen.add(key); out.append(b)
return out
def rrf_fuse_ranked_lists(ranked_lists: list[list[dict]], k: int = 60) -> list[dict]:
"""
ranked_lists: bv. [[{key,score,item},...], ...] (elk al per kanaal/bucket gesorteerd)
Return: één samengevoegde lijst (dicts) met veld 'score_fused'.
"""
# bouw mapping
pos_maps: list[dict] = []
for rl in ranked_lists or []:
pos = {}
for i, it in enumerate(rl, 1):
meta = it.get("metadata") or {}
key = f"{meta.get('repo','')}::{meta.get('path','')}::{meta.get('chunk_index','')}"
pos[key] = i
pos_maps.append(pos)
fused: dict[str, float] = {}
ref_item: dict[str, dict] = {}
for idx, rl in enumerate(ranked_lists or []):
pos_map = pos_maps[idx]
for it in rl:
meta = it.get("metadata") or {}
key = f"{meta.get('repo','')}::{meta.get('path','')}::{meta.get('chunk_index','')}"
r = pos_map.get(key, 10**9)
fused[key] = fused.get(key, 0.0) + 1.0 / (k + r)
ref_item[key] = it
out = []
for key, f in fused.items():
it = dict(ref_item[key])
it["score_fused"] = f
out.append(it)
out.sort(key=lambda x: x.get("score_fused", 0.0), reverse=True)
return out
def _rrf_from_ranklists(ranklists: List[List[str]], k: int = int(os.getenv("RRF_K", "60"))) -> Dict[str, float]:
"""
Reciprocal Rank Fusion: neemt geordende lijsten (best eerst) en
geeft samengevoegde scores {key: rrf_score}.
"""
acc = defaultdict(float)
for lst in ranklists:
for i, key in enumerate(lst):
acc[key] += 1.0 / (k + i + 1)
return acc
def _path_prior(path: str) -> float:
"""
Light-weight prior per pad. 0..1 schaal. Laravel paden krijgen bonus,
generieke code dirs ook een kleine bonus; binaire/test/asset minder.
"""
p = (path or "").replace("\\", "/").lower()
bonus = 0.0
# Laravel priors
if p.startswith("routes/"): bonus += 0.35
if p.startswith("app/http/controllers/"): bonus += 0.30
if p.startswith("resources/views/"): bonus += 0.25
if p.endswith(".blade.php"): bonus += 0.15
# Generieke priors
if p.startswith(("src/", "app/", "lib/", "pages/", "components/")): bonus += 0.12
if p.endswith((".php",".ts",".tsx",".js",".jsx",".py",".go",".rb",".java",".cs",".vue",".html",".md")):
bonus += 0.05
# Demote obvious low-signal
if "/tests/" in p or p.startswith(("tests/", "test/")): bonus -= 0.10
if p.endswith((".lock",".map",".min.js",".min.css")): bonus -= 0.10
return max(0.0, min(1.0, bonus))
def _safe_json_loads(s: str):
if not s:
return None
t = s.strip()
if t.startswith("```"):
t = re.sub(r"^```(?:json)?", "", t, count=1, flags=re.IGNORECASE).strip()
if t.endswith("```"):
t = t[:-3].strip()
try:
return json.loads(t)
except Exception:
return None
def _tok(s: str) -> List[str]:
return re.findall(r"[A-Za-z0-9_]+", s.lower())
def _jaccard(a: str, b: str) -> float:
A, B = set(_tok(a)), set(_tok(b))
if not A or not B: return 0.0
# heel kleine set-caps (noodrem tegen pathologische inputs)
if len(B) > 8000:
# reduceer B met stabiele (deterministische) sampling op basis van sha1
def _stable_byte(tok: str) -> int:
return hashlib.sha1(tok.encode("utf-8")).digest()[0]
B = {t for t in B if _stable_byte(t) < 64} # ~25% sample
return len(A & B) / max(1, len(A | B))
def _normalize(xs: List[float]) -> List[float]:
if not xs: return xs
lo, hi = min(xs), max(xs)
if hi <= lo: return [0.0]*len(xs)
return [(x - lo) / (hi - lo) for x in xs]
async def enrich_intent(llm_call_fn, messages: List[Dict]) -> Dict:
"""
Zet ongestructureerde vraag om naar een compact plan.
Velden: task, constraints, file_hints, keywords, acceptance, ask(optional).
"""
user_text = ""
for m in reversed(messages):
if m.get("role") == "user":
user_text = m.get("content","").strip()
break
sys = ("Je herstructureert een developer-vraag naar JSON. "
"Geef ALLEEN JSON, geen toelichting.")
usr = (
"Zet de essentie van de vraag om naar dit schema:\n"
"{"
"\"task\": str, "
"\"constraints\": [str,...], "
"\"file_hints\": [str,...], "
"\"keywords\": [str,...], "
"\"acceptance\": [str,...], "
"\"ask\": str|null "
"}\n\n"
f"Vraag:\n{user_text}"
)
try:
resp = await llm_call_fn(
[{"role":"system","content":sys},{"role":"user","content":usr}],
stream=False, temperature=0.1, top_p=1.0, max_tokens=512
)
raw = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
spec = _safe_json_loads(raw) or {"task": user_text, "constraints": [], "file_hints": [], "keywords": [], "acceptance": [], "ask": None} #json.loads(raw.strip())
except Exception:
# Veilige defaults
spec = {
"task": user_text,
"constraints": [],
"file_hints": [],
"keywords": [],
"acceptance": [],
"ask": None
}
# Minimalistische fallback sanity
for k in ("constraints","file_hints","keywords","acceptance"):
if not isinstance(spec.get(k), list):
spec[k] = []
if not isinstance(spec.get("task"), str):
spec["task"] = user_text
if spec.get("ask") is not None and not isinstance(spec["ask"], str):
spec["ask"] = None
return spec
async def expand_queries(llm_call_fn, q: str, k: int = 3) -> List[str]:
if str(os.getenv("RAG_EXPAND_QUERIES","1")).lower() in ("0","false"):
return [q]
sys = "Geef 3-4 korte NL/EN zoekvarianten als JSON array. Geen toelichting."
usr = f"Bronvraag:\n{q}\n\nAlleen JSON array."
try:
resp = await llm_call_fn(
[{"role":"system","content":sys},{"role":"user","content":usr}],
stream=False, temperature=0.2, top_p=0.9, max_tokens=240
)
raw = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]")
arr = _safe_json_loads(raw) or []
arr = [str(x).strip() for x in arr if str(x).strip()]
seen = {q.lower()}
base = [q]
for v in arr:
lv = v.lower()
if lv not in seen:
base.append(v); seen.add(lv)
return base[: max(1, min(len(base), k + 1))]
except Exception:
return [q]
def _sim_from_chroma_distance(d: float|None) -> float:
"""
Converteer (Chroma) distance naar similarity in [0,1]; defensief tegen None/NaN/negatief.
"""
if d is None:
return 0.0
try:
dv = float(d)
except Exception:
dv = 0.0
if not math.isfinite(dv) or dv < 0:
return 0.0
return 1.0 / (1.0 + dv)
async def hybrid_retrieve(
rag_query_internal_fn,
query: str,
*,
repo: str|None = None,
profile: str|None = None,
path_contains: str|None = None,
per_query_k: int = 30,
n_results: int = 8,
alpha: float = 0.6,
collection_name: str = "code_docs",
llm_call_fn=None,
) -> List[Dict]:
"""
Multi-variant retrieval met RRF-fusie + path-prior.
Return: lijst met dict(document, metadata, score)
"""
# Optionele query-routing + RRF
use_route = str(os.getenv("RAG_ROUTE", "1")).lower() not in ("0", "false")
use_rrf = str(os.getenv("RAG_RRF", "1")).lower() not in ("0", "false")
# Optionele mini multi-query expansion (default aan)
use_expand = str(os.getenv("RAG_MULTI_EXPAND", "1")).lower() in ("1","true","yes")
k_variants = max(1, int(os.getenv("RAG_MULTI_K", "3")))
per_query_k = max(1, int(per_query_k))
n_results = max(1, int(n_results))
if not (query or "").strip():
return []
# Multi-query variants:
if use_expand:
if llm_call_fn is not None:
variants = await expand_queries(llm_call_fn, query, k=k_variants)
else:
variants = _simple_variants(query, max_k=k_variants)
else:
variants = [query]
ranked_lists = [] # voor RRF (alle varianten/buckets)
for qv in variants:
if use_route:
buckets = _route_query_buckets(qv)
for b in buckets:
# combineer globale path_contains-hint met bucket-specifieke filter
pc = b.get("path_contains")
if path_contains and not pc:
pc = path_contains
res = await rag_query_internal_fn(
query=b["q"], n_results=per_query_k,
collection_name=collection_name,
repo=repo, path_contains=pc, profile=profile
)
lst = []
for item in (res or {}).get("results", []):
# distance kan ontbreken bij oudere backends; defensieve cast
dist = item.get("distance", None)
try: dist = float(dist) if dist is not None else None
except Exception: dist = None
emb_sim = _sim_from_chroma_distance(dist) * float(b.get("boost",1.0))
lst.append({**item, "emb_sim_routed": emb_sim})
lst.sort(key=lambda x: x.get("emb_sim_routed",0.0), reverse=True)
# Laat RRF voldoende kandidaten zien (niet te vroeg afsnijden):
ranked_lists.append(lst[:per_query_k])
else:
# geen routing: per variant direct query'en (consistent scoren/sorteren)
res = await rag_query_internal_fn(
query=qv, n_results=per_query_k,
collection_name=collection_name,
repo=repo, path_contains=path_contains, profile=profile
)
lst = []
for item in (res or {}).get("results", []):
dist = item.get("distance", None)
try: dist = float(dist) if dist is not None else None
except Exception: dist = None
emb_sim = _sim_from_chroma_distance(dist)
lst.append({**item, "emb_sim_routed": emb_sim})
lst.sort(key=lambda x: x.get("emb_sim_routed", 0.0), reverse=True)
ranked_lists.append(lst[:per_query_k])
# Als RRF aanstaat: fuseer nu
items = rrf_fuse_ranked_lists(ranked_lists) if use_rrf else [x for rl in ranked_lists for x in rl]
if not items:
return []
# Eenvoudige lexicale score (op samengevoegde set):
# neem het BESTE van alle varianten i.p.v. alleen de hoofdquery.
bm: List[float] = []
if variants and len(variants) > 1:
for it in items:
doc = it.get("document", "") or ""
bm.append(max((_jaccard(v, doc) for v in variants), default=_jaccard(query, doc)))
else:
bm = [_jaccard(query, it.get("document","")) for it in items]
bm_norm = _normalize(bm)
out = []
for i, it in enumerate(items):
# Betere fallback: gebruik routed emb sim → plain emb_sim → distance
emb = (
float(it.get("emb_sim_routed", 0.0))
or float(it.get("emb_sim", 0.0))
or _sim_from_chroma_distance(it.get("distance"))
)
score = alpha * emb + (1.0 - alpha) * bm_norm[i]
meta = (it.get("metadata") or {})
path = meta.get("path","") or ""
# — optioneel: path-prior + symbol-boost via env —
pp_w = float(os.getenv("RAG_PATH_PRIOR_W", "0.08"))
if pp_w > 0.0:
score += pp_w * _path_prior(path)
sym_w = float(os.getenv("RAG_SYM_BOOST", "0.04"))
if sym_w > 0.0:
syms_raw = meta.get("symbols")
if isinstance(syms_raw, str):
syms = [s.strip().lower() for s in syms_raw.split(",") if s.strip()]
elif isinstance(syms_raw, list):
syms = [str(s).strip().lower() for s in syms_raw if str(s).strip()]
else:
syms = []
if syms:
q_terms = set(_tok(query))
if q_terms & set(syms):
score += sym_w
out.append({**it, "score": float(score)})
out.sort(key=lambda x: x["score"], reverse=True)
return out[:int(n_results)]
def assemble_context(chunks: List[Dict], *, max_chars: int = 24000) -> Tuple[str, float]:
"""
Budgeted stitching:
- groepeer per path
- per path: neem 1-3 fragmenten (op volgorde van chunk_index indien beschikbaar)
- verdeel char-budget over paden, zwaarder voor hogere scores
- behoud Laravel stitching
Retour: (context_text, top_score)
"""
if not chunks:
return "", 0.0
# 1) Groepeer per path en verzamel scores + (optioneel) chunk_index
by_path: Dict[str, List[Dict]] = {}
top_score = 0.0
for r in chunks:
meta = (r.get("metadata") or {})
path = meta.get("path","") or ""
r["_chunk_index"] = meta.get("chunk_index")
r["_score"] = float(r.get("score", 0.0) or 0.0)
top_score = max(top_score, r["_score"])
by_path.setdefault(path, []).append(r)
# 2) Per path: sorteer op chunk_index (indien beschikbaar) anders score; cap op N stukken
def _sort_key(x):
ci = x.get("_chunk_index")
return (0, int(ci)) if isinstance(ci, int) or (isinstance(ci, str) and str(ci).isdigit()) else (1, -x["_score"])
path_items = []
max_pieces = int(os.getenv("CTX_PIECES_PER_PATH_CAP", "3"))
for p, lst in by_path.items():
lst_sorted = sorted(lst, key=_sort_key)
path_items.append({
"path": p,
"best_score": max(x["_score"] for x in lst_sorted),
"pieces": lst_sorted[:max(1, max_pieces)], # cap per bestand
})
# 3) Sorteer paden op best_score en bereken budgetverdeling (softmax-achtig, maar bounded)
path_items.sort(key=lambda t: t["best_score"], reverse=True)
# clamp scores naar [0,1] voor stabielere allocatie
scores = [min(1.0, max(0.0, t["best_score"])) for t in path_items]
# softmax-lite: exp(score*beta) normaliseren; beta iets lager om niet te scherp te verdelen
beta = float(os.getenv("CTX_ALLOC_BETA", "2.2"))
w = [math.exp(beta * s) for s in scores]
S = max(1e-9, sum(w))
weights = [x / S for x in w]
# 4) Bouw snelle lookup path->full body (voor Laravel stitching)
by_path_first_body: Dict[str, str] = {}
for t in path_items:
doc0 = (t["pieces"][0].get("document") or "").strip()
by_path_first_body[t["path"]] = doc0
# 5) Render met budget per pad
out = []
used = 0
for t, w_i in zip(path_items, weights):
p = t["path"]
# minimaal & maximaal budget per pad (chars)
min_chars = int(os.getenv("CTX_ALLOC_MIN_PER_PATH", "1200"))
max_chars_path = int(os.getenv("CTX_ALLOC_MAX_PER_PATH", "6000"))
alloc = min(max(min_chars, int(max_chars * w_i)), max_chars_path)
# stitch 1..3 stukken van dit pad binnen alloc
header = f"### {p} (score={t['best_score']:.3f})"
block_buf = [header]
remaining = max(0, alloc - len(header) - 1)
for piece in t["pieces"]:
body = (piece.get("document") or "").strip()
# knip niet middenin een regel: neem tot remaining en rol terug tot laatste newline
if remaining <= 0:
break
if len(body) > remaining:
cut = body[:remaining]
nl = cut.rfind("\n")
if nl > 300: # laat niet té kort
body = cut[:nl] + "\n"
else:
body = cut + ""
block_buf.append(body)
remaining -= len(body)
if remaining <= 300: # hou wat over voor stitching
break
block = "\n".join(block_buf)
# --- Laravel mini-stitch zoals voorheen, maar budgetbewust
stitched = []
if p in ("routes/web.php", "routes/api.php"):
for ctrl_path, _meth in _laravel_pairs_from_route_text(by_path_first_body.get(p,"")):
if ctrl_path in by_path_first_body and remaining > 400:
snippet = by_path_first_body[ctrl_path][:min(400, remaining)]
stitched.append(f"\n### {ctrl_path} (stitch)\n{snippet}")
remaining -= len(snippet)
if p.startswith("app/Http/Controllers/"):
for vpath in _laravel_guess_view_paths_from_text(by_path_first_body.get(p,"")):
if vpath in by_path_first_body and remaining > 400:
snippet = by_path_first_body[vpath][:min(400, remaining)]
stitched.append(f"\n### {vpath} (stitch)\n{snippet}")
remaining -= len(snippet)
if stitched:
block += "\n" + "\n".join(stitched)
# Past het volledige blok niet meer, knip netjes i.p.v. alles laten vallen
remaining_total = max_chars - used
if remaining_total <= 0:
break
if len(block) > remaining_total:
# Zorg dat we niet midden in markdown header afkappen
trimmed = block[:max(0, remaining_total - 1)]
block = trimmed + ""
out.append(block)
used = max_chars
break
else:
out.append(block)
used += len(block)
# stop vroeg als we het budget bijna op hebben
if max_chars - used < 800:
break
return ("\n\n".join(out), float(top_score))
# --- Laravel route/controller/view helpers (lightweight, cycle-safe) ---
def _laravel_pairs_from_route_text(route_text: str):
"""
Parse routes/web.php|api.php tekst en yield (controller_path, method) guesses.
Ondersteunt:
- 'Controller@method'
- FQCN zoals App\\Http\\Controllers\\Foo\\BarController::class
"""
out = []
# 1) 'Controller@method'
for m in re.finditer(r"['\"]([A-Za-z0-9_\\]+)@([A-Za-z0-9_]+)['\"]", route_text):
fq = m.group(1)
method = m.group(2)
ctrl = fq.replace("\\\\","/").replace("\\","/")
name = ctrl.split("/")[-1]
guess = f"app/Http/Controllers/{ctrl}.php"
alt = f"app/Http/Controllers/{name}.php"
out.append((guess, method))
out.append((alt, method))
# 2) FQCN ::class
for m in re.finditer(r"([A-Za-z_][A-Za-z0-9_\\]+)\s*::\s*class", route_text):
fq = m.group(1)
ctrl = fq.replace("\\\\","/").replace("\\","/")
name = ctrl.split("/")[-1]
guess = f"app/Http/Controllers/{ctrl}.php"
alt = f"app/Http/Controllers/{name}.php"
out.append((guess, None))
out.append((alt, None))
# dedupe, behoud orde
seen = set(); dedup = []
for p in out:
if p not in seen:
seen.add(p); dedup.append(p)
return dedup
def _laravel_guess_view_paths_from_text(controller_text: str):
"""
Parse simpele 'return view(\"foo.bar\")' patronen resources/views/foo/bar.blade.php
"""
out = []
for m in re.finditer(r"view\(\s*['\"]([A-Za-z0-9_.\/-]+)['\"]\s*\)", controller_text):
view = m.group(1).strip().strip(".")
# 'foo.bar' of 'foo/bar'
path = view.replace(".", "/")
out.append(f"resources/views/{path}.blade.php")
# dedupe
seen = set(); dedup = []
for p in out:
if p not in seen:
seen.add(p); dedup.append(p)
return dedup
# Public API surface
__all__ = [
"enrich_intent",
"expand_queries",
"hybrid_retrieve",
"assemble_context",
"_laravel_pairs_from_route_text",
"_laravel_guess_view_paths_from_text",
]

1
toolserver.sh Executable file
View File

@ -0,0 +1 @@
docker run -d --rm --name toolserver --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=0 -e LLM_CONTEXT_TOKENS=42000 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=0 -e LLM_FUNCTION_CALLING_MODE=auto -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -e FORCE_ALL_TOOLS=0 -e AUTO_CONTINUE=0 -e LLM_PROXY_URL="http://192.168.100.1:8081/v1/chat/completions" -e ALLOWED_GIT_HOSTS="192.168.100.1,localhost,127.0.0.1,10.25.138.40" -e STREAM_PREFER_DIRECT=1 toolserver

BIN
toolserver_toolset_v2.zip Normal file

Binary file not shown.

333
web_search.py Normal file
View File

@ -0,0 +1,333 @@
import os
import requests
from datetime import datetime
import json
import logging
from requests import get
from bs4 import BeautifulSoup
import concurrent.futures
from html.parser import HTMLParser
from urllib.parse import urlparse, urljoin
import re
import unicodedata
from pydantic import BaseModel, Field
import asyncio
from typing import Any, List, Optional
import httpx
logger = logging.getLogger("web_search")
async def call_improve_web_query(text: str, max_chars: int = 20000, objective: str = None, style: str = None):
url = "http://localhost:8080/improve_web_query"
params = {"text": text, "max_chars": max_chars}
if objective:
params["objective"] = objective
if style:
params["style"] = style
async with httpx.AsyncClient() as client:
response = await client.get(url, params=params)
return response.json()
class HelpFunctions:
def __init__(self):
pass
def get_base_url(self, url: str) -> str:
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
return base_url
def generate_excerpt(self, content: str, max_length: int = 200) -> str:
return content[:max_length] + "..." if len(content) > max_length else content
def format_text(self, original_text: str) -> str:
soup = BeautifulSoup(original_text, "html.parser")
formatted_text = soup.get_text(separator=" ", strip=True)
formatted_text = unicodedata.normalize("NFKC", formatted_text)
formatted_text = re.sub(r"\s+", " ", formatted_text)
formatted_text = formatted_text.strip()
formatted_text = self.remove_emojis(formatted_text)
return formatted_text
def remove_emojis(self, text: str) -> str:
return "".join(c for c in text if not unicodedata.category(c).startswith("So"))
def process_search_result(self, result: dict, valves: Any) -> Optional[dict]:
title_site = self.remove_emojis(result["title"])
url_site = result["url"]
snippet = result.get("content", "")
# Check if the website is in the ignored list, but only if IGNORED_WEBSITES is not empty
if valves.IGNORED_WEBSITES:
base_url = self.get_base_url(url_site)
if any(
ignored_site.strip() in base_url
for ignored_site in valves.IGNORED_WEBSITES.split(",")
):
return None
try:
response_site = requests.get(url_site, timeout=20)
response_site.raise_for_status()
html_content = response_site.text
soup = BeautifulSoup(html_content, "html.parser")
content_site = self.format_text(soup.get_text(separator=" ", strip=True))
truncated_content = self.truncate_to_n_words(
content_site, valves.PAGE_CONTENT_WORDS_LIMIT
)
return {
"title": title_site,
"url": url_site,
"content": truncated_content,
"snippet": self.remove_emojis(snippet),
}
except requests.exceptions.RequestException as e:
return None
def truncate_to_n_words(self, text: str, token_limit: int) -> str:
tokens = text.split()
truncated_tokens = tokens[:token_limit]
return " ".join(truncated_tokens)
class EventEmitter:
def __init__(self, event_emitter: Optional[Any] = None):
self.event_emitter = event_emitter
async def emit(self, description: str = "Unknown State", status: str = "in_progress", done: bool = False):
if self.event_emitter:
await self.event_emitter(
{
"type": "status",
"data": {
"status": status,
"description": description,
"done": done,
},
}
)
class Tools:
class Valves(BaseModel):
SEARXNG_ENGINE_API_BASE_URL: str = Field(
default="http://192.168.100.1:8899/search",
description="The base URL for SearXNG Search Engine",
)
IGNORED_WEBSITES: str = Field(
default="",
description="Comma-separated list of websites to ignore",
)
RETURNED_SCRAPPED_PAGES_NO: int = Field(
default=3,
description="The number of Search Engine Results to Parse",
)
SCRAPPED_PAGES_NO: int = Field(
default=5,
description="Total pages scapped. Ideally greater than one of the returned pages",
)
PAGE_CONTENT_WORDS_LIMIT: int = Field(
default=5000,
description="Limit words content for each page.",
)
CITATION_LINKS: bool = Field(
default=False,
description="If True, send custom citations with links",
)
def __init__(self):
self.valves = self.Valves()
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
async def search_web(
self,
query: str
) -> str:
"""
Search the web using SearXNG and get the content of the relevant pages.
:param query: Web Query used in search engine.
:param __event_emitter__: Optional event emitter for status updates.
:return: The content of the pages in json format.
"""
logger.info(" in query: %s",query)
try:
#query=asyncio.run(call_improve_web_query(query))
pass
except Exception as e:
logger.error("ERROR: %s",str(e))
logger.info(" out query: %s",query)
__event_emitter__=None
functions = HelpFunctions()
emitter = EventEmitter(__event_emitter__)
await emitter.emit(description=f"Initiating web search for: {query}")
search_engine_url = self.valves.SEARXNG_ENGINE_API_BASE_URL
# Ensure RETURNED_SCRAPPED_PAGES_NO does not exceed SCRAPPED_PAGES_NO
if self.valves.RETURNED_SCRAPPED_PAGES_NO > self.valves.SCRAPPED_PAGES_NO:
self.valves.RETURNED_SCRAPPED_PAGES_NO = self.valves.SCRAPPED_PAGES_NO
params = {
"q": query,
"format": "json",
"number_of_results": self.valves.RETURNED_SCRAPPED_PAGES_NO,
}
try:
await emitter.emit(description="Sending request to search engine")
resp = requests.get(
search_engine_url, params=params, headers=self.headers, timeout=120
)
logger.info("query : %s", query)
logger.info("REQUEST URL: %s", resp.url)
resp.raise_for_status()
data = resp.json()
results = data.get("results", [])
limited_results = results[: self.valves.SCRAPPED_PAGES_NO]
await emitter.emit(description=f"Retrieved {len(limited_results)} search results")
except requests.exceptions.RequestException as e:
await emitter.emit(
status="error",
description=f"Error during search: {str(e)}",
done=True,
)
return json.dumps({"error": str(e)})
results_json = []
if limited_results:
await emitter.emit(description=f"Processing search results")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(
functions.process_search_result, result, self.valves
)
for result in limited_results
]
for future in concurrent.futures.as_completed(futures):
result_json = future.result()
if result_json:
try:
json.dumps(result_json)
results_json.append(result_json)
except (TypeError, ValueError):
continue
if len(results_json) >= self.valves.RETURNED_SCRAPPED_PAGES_NO:
break
results_json = results_json[: self.valves.RETURNED_SCRAPPED_PAGES_NO]
if self.valves.CITATION_LINKS and __event_emitter__:
for result in results_json:
await __event_emitter__(
{
"type": "citation",
"data": {
"document": [result["content"]],
"metadata": [{"source": result["url"]}],
"source": {"name": result["title"]},
},
}
)
await emitter.emit(
status="complete",
description=f"Web search completed. Retrieved content from {len(results_json)} pages",
done=True,
)
return json.dumps(results_json, ensure_ascii=False)
async def get_website(
self, url: str
) -> str:
"""
Web scrape the website provided and get the content of it.
:param url: The URL of the website.
:param __event_emitter__: Optional event emitter for status updates.
:return: The content of the website in json format.
"""
__event_emitter__=None
functions = HelpFunctions()
emitter = EventEmitter(__event_emitter__)
if 'http' not in url:
#assume that a https:// prepend is needed.
url='https://'+url
await emitter.emit(description=f"Fetching content from URL: {url}")
results_json = []
try:
response_site = requests.get(url, headers=self.headers, timeout=120)
response_site.raise_for_status()
html_content = response_site.text
await emitter.emit(description="Parsing website content")
soup = BeautifulSoup(html_content, "html.parser")
page_title = soup.title.string if soup.title else "No title found"
page_title = unicodedata.normalize("NFKC", page_title.strip())
page_title = functions.remove_emojis(page_title)
title_site = page_title
url_site = url
content_site = functions.format_text(
soup.get_text(separator=" ", strip=True)
)
truncated_content = functions.truncate_to_n_words(
content_site, self.valves.PAGE_CONTENT_WORDS_LIMIT
)
result_site = {
"title": title_site,
"url": url_site,
"content": truncated_content,
"excerpt": functions.generate_excerpt(content_site),
}
results_json.append(result_site)
if self.valves.CITATION_LINKS and __event_emitter__:
await __event_emitter__(
{
"type": "citation",
"data": {
"document": [truncated_content],
"metadata": [{"source": url_site}],
"source": {"name": title_site},
},
}
)
await emitter.emit(
status="complete",
description="Website content retrieved and processed successfully",
done=True,
)
except requests.exceptions.RequestException as e:
results_json.append(
{
"url": url,
"content": f"Failed to retrieve the page. Error: {str(e)}",
}
)
await emitter.emit(
status="error",
description=f"Error fetching website content: {str(e)}",
done=True,
)
return json.dumps(results_json, ensure_ascii=False)

172
windowing_utils.py Normal file
View File

@ -0,0 +1,172 @@
# windowing_utils.py
from __future__ import annotations
from dataclasses import dataclass, field
from typing import List, Dict, Callable, Optional, Tuple, Awaitable
import hashlib
import os
import time
# ---------- Token counting (vervang door echte tokenizer indien je wilt)
def approx_token_count(text: str) -> int:
# ~4 chars ≈ 1 token (ruwe maar stabiele vuistregel)
return max(1, len(text) // 4)
def count_message_tokens(messages: List[Dict], tok_len: Callable[[str], int]) -> int:
total = 0
for m in messages:
total += tok_len(m.get("content", ""))
return total
# ---------- Thread ID + summary store
def derive_thread_id(body: Dict) -> str:
for key in ("conversation_id", "thread_id", "chat_id", "session_id", "room_id"):
if key in body and body[key]:
return str(body[key])
parts = [str(body.get("model", ""))]
msgs = body.get("messages", [])[:2]
for m in msgs:
parts.append(m.get("role", ""))
parts.append(m.get("content", "")[:256])
raw = "||".join(parts)
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
class RunningSummaryStore:
def __init__(self):
self._mem: dict[str, str] = {}
def get(self, thread_id: str) -> str:
return self._mem.get(thread_id, "")
def update(self, thread_id: str, new_summary: str):
self._mem[thread_id] = new_summary
SUMMARY_STORE = RunningSummaryStore()
# ---------- Sliding window + running summary
@dataclass
class ConversationWindow:
max_ctx_tokens: int
response_reserve: int = 2048
tok_len: Callable[[str], int] = approx_token_count
running_summary: str = ""
summary_header: str = "Samenvatting tot nu toe"
history: List[Dict] = field(default_factory=list)
def add(self, role: str, content: str):
self.history.append({"role": role, "content": content})
def _base_messages(self, system_prompt: Optional[str]) -> List[Dict]:
msgs: List[Dict] = []
if system_prompt:
msgs.append({"role": "system", "content": system_prompt})
if self.running_summary:
msgs.append({"role": "system", "content": f"{self.summary_header}:\n{self.running_summary}"})
return msgs
async def build_within_budget(
self,
system_prompt: Optional[str],
summarizer: Optional[Callable[[str, List[Dict]], Awaitable[str]]] = None
) -> List[Dict]:
budget = self.max_ctx_tokens - max(1, self.response_reserve)
working = self.history[:]
candidate = self._base_messages(system_prompt) + working
if count_message_tokens(candidate, self.tok_len) <= budget:
return candidate
# 1) trim oudste turns
while working and count_message_tokens(self._base_messages(system_prompt) + working, self.tok_len) > budget:
working.pop(0)
candidate = self._base_messages(system_prompt) + working
if count_message_tokens(candidate, self.tok_len) <= budget:
self.history = working
return candidate
# 2) samenvatten indien mogelijk
if summarizer is None:
while working and count_message_tokens(self._base_messages(system_prompt) + working, self.tok_len) > budget:
working.pop(0)
self.history = working
return self._base_messages(system_prompt) + working
# samenvat in batches
working = self.history[:]
chunk_buf: List[Dict] = []
async def build_candidate(_summary: str, _working: List[Dict]) -> List[Dict]:
base = []
if system_prompt:
base.append({"role": "system", "content": system_prompt})
if _summary:
base.append({"role": "system", "content": f"{self.summary_header}:\n{_summary}"})
return base + _working
while working and count_message_tokens(await build_candidate(self.running_summary, working), self.tok_len) > budget:
chunk_buf.append(working.pop(0))
# bij ~1500 tokens in buffer (ruw) samenvatten
if count_message_tokens([{"role":"system","content":str(chunk_buf)}], self.tok_len) > 1500 or not working:
self.running_summary = await summarizer(self.running_summary, chunk_buf)
chunk_buf = []
# verwerk eventuele overgebleven buffer zodat er geen turns verdwijnen
if chunk_buf:
self.running_summary = await summarizer(self.running_summary, chunk_buf)
chunk_buf = []
self.history = working
return await build_candidate(self.running_summary, working)
# ---------- Repo chunking
from typing import Iterable
def split_text_tokens(
text: str,
tok_len: Callable[[str], int],
max_tokens: int,
overlap_tokens: int = 60
) -> List[str]:
if tok_len(text) <= max_tokens:
return [text]
approx_ratio = max_tokens / max(1, tok_len(text))
step = max(1000, int(len(text) * approx_ratio))
chunks: List[str] = []
i = 0
while i < len(text):
ch = text[i:i+step]
while tok_len(ch) > max_tokens and len(ch) > 200:
ch = ch[:-200]
chunks.append(ch)
if overlap_tokens > 0:
ov_chars = max(100, overlap_tokens * 4)
i += max(1, len(ch) - ov_chars)
else:
i += len(ch)
return chunks
def fit_context_under_budget(
items: List[Tuple[str,str]], tok_len: Callable[[str], int], budget_tokens: int
) -> List[Tuple[str,str]]:
res: List[Tuple[str,str]] = []
used = 0
for title, text in items:
t = tok_len(text)
if used + t <= budget_tokens:
res.append((title, text))
used += t
else:
break
return res
def build_repo_context(
files_ranked: List[Tuple[str, str, float]],
per_chunk_tokens: int = 2100,
overlap_tokens: int = 60,
ctx_budget_tokens: int = 5000,
tok_len: Callable[[str], int] = approx_token_count
) -> str:
expanded: List[Tuple[str,str]] = []
for path, content, _ in files_ranked:
for i, ch in enumerate(split_text_tokens(content, tok_len, per_chunk_tokens, overlap_tokens)):
expanded.append((f"{path}#chunk{i+1}", ch))
selected = fit_context_under_budget(expanded, tok_len, ctx_budget_tokens)
ctx = ""
for title, ch in selected:
ctx += f"\n\n=== {title} ===\n{ch}"
return ctx.strip()