first commit
This commit is contained in:
commit
cbfddf128e
108
Dockerfile
Normal file
108
Dockerfile
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
# ===== Base met CUDA11.8 + cuDNN + conda =====
|
||||||
|
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-runtime
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Zorg dat conda libs altijd eerst gevonden worden
|
||||||
|
ENV LD_LIBRARY_PATH=/opt/conda/lib:${LD_LIBRARY_PATH}
|
||||||
|
|
||||||
|
# P5000 = Pascal SM 6.1; handig voor (eventueel) on-the-fly builds
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="6.1"
|
||||||
|
|
||||||
|
# ===== Model caches op vaste paden =====
|
||||||
|
ENV HF_HOME=/opt/hf \
|
||||||
|
HUGGINGFACE_HUB_CACHE=/opt/hf \
|
||||||
|
TRANSFORMERS_CACHE=/opt/hf \
|
||||||
|
SENTENCE_TRANSFORMERS_HOME=/opt/sentence-transformers \
|
||||||
|
XDG_CACHE_HOME=/opt/cache \
|
||||||
|
STT_MODEL=small
|
||||||
|
|
||||||
|
ARG RAG_EMBEDDINGS=gte-multilingual
|
||||||
|
ARG STT_MODEL_ARG=small
|
||||||
|
ENV RAG_EMBEDDINGS=${RAG_EMBEDDINGS}
|
||||||
|
ENV STT_MODEL=${STT_MODEL_ARG}
|
||||||
|
|
||||||
|
# directories
|
||||||
|
RUN mkdir -p /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper && \
|
||||||
|
chmod -R a+rX /opt/hf /opt/cache /opt/sentence-transformers /opt/whisper
|
||||||
|
|
||||||
|
# ===== Alleen minimale apt utils (géén multimedia libs!) =====
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
git curl build-essential ca-certificates \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# ===== Multimedia via conda-forge (alles uit één ecosysteem) =====
|
||||||
|
# - av 10 + ffmpeg<7 (past goed bij pyAV)
|
||||||
|
# - cairo/pango/gdk-pixbuf/pixman voor cairosvg stack
|
||||||
|
# VERVANG de vorige conda multimedia regel door deze:
|
||||||
|
# Tooling voor PyAV build
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
|
pkg-config git curl ffmpeg libcairo2 libpango-1.0-0 libgdk-pixbuf2.0-0 apt-utils pkg-config libavformat-dev libavcodec-dev libavdevice-dev libavutil-dev libavfilter-dev libswscale-dev libswresample-dev build-essential && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# FFmpeg via conda-forge (zodat je recente headers/libs hebt)
|
||||||
|
RUN conda config --system --set channel_priority flexible \
|
||||||
|
&& conda install -y -c conda-forge "ffmpeg>=6,<8" \
|
||||||
|
&& conda clean -afy
|
||||||
|
|
||||||
|
# Later in je pip stap:
|
||||||
|
# ... faster-whisper==1.0.0 zal av==11.* trekken en nu WEL kunnen bouwen tegen conda’s FFmpeg 6
|
||||||
|
|
||||||
|
# ===== Python deps =====
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
# jouw requirements
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
# losse extras (let op: av via conda, niet via pip!)
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
PyPDF2 python-multipart gitpython chromadb httpx meilisearch \
|
||||||
|
pandas openpyxl python-pptx faster-whisper==1.0.0 \
|
||||||
|
cairosvg sentence-transformers rank-bm25
|
||||||
|
|
||||||
|
# ===== Prefetch modellen =====
|
||||||
|
|
||||||
|
# 1) SentenceTransformers
|
||||||
|
RUN python - <<'PY'
|
||||||
|
import os
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
mapping = {
|
||||||
|
"gte-multilingual": "Alibaba-NLP/gte-multilingual-base",
|
||||||
|
"bge-small": "BAAI/bge-small-en-v1.5",
|
||||||
|
"e5-small": "intfloat/e5-small-v2",
|
||||||
|
"gte-base-en": "thenlper/gte-base",
|
||||||
|
}
|
||||||
|
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
||||||
|
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
||||||
|
cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
|
||||||
|
local_dir = os.path.join(cache_root, "embedder")
|
||||||
|
os.makedirs(cache_root, exist_ok=True)
|
||||||
|
print("Downloading SentenceTransformer:", hf_id)
|
||||||
|
model = SentenceTransformer(hf_id, cache_folder=cache_root, device="cpu") # download only
|
||||||
|
model.save(local_dir)
|
||||||
|
print("Prefetched SentenceTransformer:", hf_id)
|
||||||
|
PY
|
||||||
|
|
||||||
|
# 2) faster-whisper (prefetch CPU-kant; runtime kan je device kiezen)
|
||||||
|
RUN python - <<'PY'
|
||||||
|
import os
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
|
name = os.environ.get("STT_MODEL","small")
|
||||||
|
cache_root = os.path.join(os.environ.get("XDG_CACHE_HOME","/opt/cache"), "whisper")
|
||||||
|
os.makedirs(cache_root, exist_ok=True)
|
||||||
|
_ = WhisperModel(name, device="cpu", compute_type="int8", download_root=cache_root)
|
||||||
|
print("Prefetched faster-whisper:", name, "->", cache_root)
|
||||||
|
PY
|
||||||
|
|
||||||
|
# (optioneel) piper skip ik hier; kan later
|
||||||
|
|
||||||
|
# ===== App code =====
|
||||||
|
COPY app.py .
|
||||||
|
COPY queue_helper.py .
|
||||||
|
COPY agent_repo.py .
|
||||||
|
COPY windowing_utils.py .
|
||||||
|
COPY smart_rag.py .
|
||||||
|
COPY llm_client.py .
|
||||||
|
COPY web_search.py .
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||||
33
README.md
Normal file
33
README.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# Toolserver repo toolset v2
|
||||||
|
|
||||||
|
Deze patch voegt een duidelijkere repo-toolset toe, met een macro-tool die het "pas repo aan en push naar test-branch" proces in 1 toolcall doet.
|
||||||
|
|
||||||
|
## Nieuwe (aanbevolen) tools
|
||||||
|
|
||||||
|
- `repo_open(repo_url, branch="main") -> {workspace_id,...}`
|
||||||
|
- `repo_search(query, mode="auto|grep|rag", workspace_id? / repo_url?, n_results=20, ...)`
|
||||||
|
- `repo_read(path, workspace_id? / repo_url?, start_line?, end_line?)`
|
||||||
|
- `repo_apply(workspace_id? / repo_url?, patch_b64|patch|files, dry_run=false)`
|
||||||
|
- `repo_push(workspace_id? / repo_url?, base_branch="main", new_branch?, branch_prefix="test", commit_message=...)`
|
||||||
|
- `repo_pr_create(repo_url, head_branch, base_branch="main", title, body?)`
|
||||||
|
- `repo_change_to_branch(repo_url, base_branch="main", patch_b64|patch|files, new_branch?/branch_prefix, commit_message, create_pr?, pr_title?, pr_body?)`
|
||||||
|
|
||||||
|
## Deprecated (blijft werken)
|
||||||
|
|
||||||
|
- `repo_grep` -> alias naar `repo_search(mode="grep")`
|
||||||
|
- `rag_index_repo` -> meestal niet meer nodig; `repo_search` indexeert automatisch
|
||||||
|
- `rag_query` -> gebruik `repo_search(mode="rag")`
|
||||||
|
|
||||||
|
## Belangrijke env vars
|
||||||
|
|
||||||
|
- `LLM_PROXY_URL=http://192.168.100.1:8081/v1/completions` (of `/v1/chat/completions` als je proxy dat biedt)
|
||||||
|
- `GITEA_TOKEN=...` (alleen nodig voor PR create, en optioneel voor push auth als je repo_url geen creds bevat)
|
||||||
|
- `GITEA_USER=oauth2` (optioneel; default is oauth2 bij token-auth)
|
||||||
|
- `ALLOWED_GIT_HOSTS=10.25.138.40,192.168.100.1` (aanrader; leeg = alles toegestaan)
|
||||||
|
|
||||||
|
## Aanbevolen flow voor LLM
|
||||||
|
|
||||||
|
1) `repo_search(mode="auto")` om relevante bestanden te vinden
|
||||||
|
2) `repo_read` om de files te lezen
|
||||||
|
3) (LLM maakt patch)
|
||||||
|
4) `repo_change_to_branch` met `patch_b64` om te pushen naar een test-branch (+ evt PR)
|
||||||
33
README_TOOLSET_V2.md
Normal file
33
README_TOOLSET_V2.md
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# Toolserver repo toolset v2
|
||||||
|
|
||||||
|
Deze patch voegt een duidelijkere repo-toolset toe, met een macro-tool die het "pas repo aan en push naar test-branch" proces in 1 toolcall doet.
|
||||||
|
|
||||||
|
## Nieuwe (aanbevolen) tools
|
||||||
|
|
||||||
|
- `repo_open(repo_url, branch="main") -> {workspace_id,...}`
|
||||||
|
- `repo_search(query, mode="auto|grep|rag", workspace_id? / repo_url?, n_results=20, ...)`
|
||||||
|
- `repo_read(path, workspace_id? / repo_url?, start_line?, end_line?)`
|
||||||
|
- `repo_apply(workspace_id? / repo_url?, patch_b64|patch|files, dry_run=false)`
|
||||||
|
- `repo_push(workspace_id? / repo_url?, base_branch="main", new_branch?, branch_prefix="test", commit_message=...)`
|
||||||
|
- `repo_pr_create(repo_url, head_branch, base_branch="main", title, body?)`
|
||||||
|
- `repo_change_to_branch(repo_url, base_branch="main", patch_b64|patch|files, new_branch?/branch_prefix, commit_message, create_pr?, pr_title?, pr_body?)`
|
||||||
|
|
||||||
|
## Deprecated (blijft werken)
|
||||||
|
|
||||||
|
- `repo_grep` -> alias naar `repo_search(mode="grep")`
|
||||||
|
- `rag_index_repo` -> meestal niet meer nodig; `repo_search` indexeert automatisch
|
||||||
|
- `rag_query` -> gebruik `repo_search(mode="rag")`
|
||||||
|
|
||||||
|
## Belangrijke env vars
|
||||||
|
|
||||||
|
- `LLM_PROXY_URL=http://192.168.100.1:8081/v1/completions` (of `/v1/chat/completions` als je proxy dat biedt)
|
||||||
|
- `GITEA_TOKEN=...` (alleen nodig voor PR create, en optioneel voor push auth als je repo_url geen creds bevat)
|
||||||
|
- `GITEA_USER=oauth2` (optioneel; default is oauth2 bij token-auth)
|
||||||
|
- `ALLOWED_GIT_HOSTS=10.25.138.40,192.168.100.1` (aanrader; leeg = alles toegestaan)
|
||||||
|
|
||||||
|
## Aanbevolen flow voor LLM
|
||||||
|
|
||||||
|
1) `repo_search(mode="auto")` om relevante bestanden te vinden
|
||||||
|
2) `repo_read` om de files te lezen
|
||||||
|
3) (LLM maakt patch)
|
||||||
|
4) `repo_change_to_branch` met `patch_b64` om te pushen naar een test-branch (+ evt PR)
|
||||||
26
README_repo_push.txt
Normal file
26
README_repo_push.txt
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
Toolserver patch + repo_push tool
|
||||||
|
|
||||||
|
Dit zipje bevat:
|
||||||
|
- app.py (toolserver-only + repo_push)
|
||||||
|
- llm_client.py
|
||||||
|
|
||||||
|
Nieuwe tool: repo_push
|
||||||
|
- Maakt een nieuwe branch (default: test/<timestamp>-<slug>) vanaf base_branch
|
||||||
|
- Past een unified diff toe (git apply)
|
||||||
|
- Commit + push naar origin
|
||||||
|
- Optioneel: create_pr (best-effort) via Gitea API
|
||||||
|
|
||||||
|
Belangrijke env vars:
|
||||||
|
- LLM_PROXY_URL="http://192.168.100.1:8081/v1/completions" (voor tools die LLM nodig hebben)
|
||||||
|
- ALLOWED_GIT_HOSTS="192.168.100.1,localhost,127.0.0.1" (veiligheidscheck; pas aan indien nodig)
|
||||||
|
|
||||||
|
Voor push-auth (HTTP):
|
||||||
|
- GITEA_USER="..." (of GIT_HTTP_USER)
|
||||||
|
- GITEA_TOKEN="..." (of GIT_HTTP_TOKEN)
|
||||||
|
|
||||||
|
Git identity voor commits:
|
||||||
|
- GIT_AUTHOR_NAME="toolserver"
|
||||||
|
- GIT_AUTHOR_EMAIL="toolserver@local"
|
||||||
|
|
||||||
|
Gebruik:
|
||||||
|
- tools worden automatisch beschikbaar via /openapi/repo_push en /v1/tools.
|
||||||
19
README_toolserver_patch.txt
Normal file
19
README_toolserver_patch.txt
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
Toolserver patch
|
||||||
|
|
||||||
|
Wat verandert dit?
|
||||||
|
- app.py draait nu standaard als TOOLSERVER_ONLY=1:
|
||||||
|
* expose: /openapi.json + /openapi/* + /v1/tools + /v1/tools/call + /healthz + /metrics
|
||||||
|
* verwijdert alle overige routes (zoals /v1/chat/completions) uit de FastAPI router.
|
||||||
|
- Alle LLM-calls die tools/agents doen, gaan via llm_client -> QueueManager -> LLM_PROXY_URL.
|
||||||
|
|
||||||
|
Config:
|
||||||
|
export TOOLSERVER_ONLY=1
|
||||||
|
export LLM_PROXY_URL="http://192.168.100.1:8081/v1/completions"
|
||||||
|
export LLM_MODEL="mistral-medium" # of wat je proxy verwacht
|
||||||
|
|
||||||
|
Startvoorbeeld:
|
||||||
|
uvicorn app:app --host 0.0.0.0 --port 8080
|
||||||
|
|
||||||
|
Opmerking:
|
||||||
|
- Interne LLM streaming is uitgezet (tools zijn non-stream).
|
||||||
|
Als je later streaming nodig hebt voor agents, dan moeten we llm_client uitbreiden.
|
||||||
4887
agent_repo.py
Normal file
4887
agent_repo.py
Normal file
File diff suppressed because it is too large
Load Diff
141
llm_client.py
Normal file
141
llm_client.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from queue_helper import QueueManager
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# Config voor onderliggende LLM-backend / proxy
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
# Je kunt één van deze zetten:
|
||||||
|
# - LLM_PROXY_URL: volledige URL naar OpenAI-compat endpoint (bv. http://host:8081/v1/completions of /v1/chat/completions)
|
||||||
|
# - LLM_API_BASE : base-url (fallback). Dan gebruiken we /v1/chat/completions
|
||||||
|
LLM_PROXY_URL = (os.getenv("LLM_PROXY_URL") or "").strip()
|
||||||
|
LLM_API_BASE = os.getenv("LLM_API_BASE", "").strip() or "http://127.0.0.1:11434"
|
||||||
|
LLM_DEFAULT_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
|
||||||
|
LLM_REQUEST_TIMEOUT = float(os.getenv("LLM_REQUEST_TIMEOUT", "180"))
|
||||||
|
|
||||||
|
# Deze wordt in app.py gezet via init_llm_client(...)
|
||||||
|
LLM_QUEUE: QueueManager | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def init_llm_client(queue: QueueManager) -> None:
|
||||||
|
global LLM_QUEUE
|
||||||
|
LLM_QUEUE = queue
|
||||||
|
logger.info("llm_client: LLM_QUEUE gekoppeld via init_llm_client.")
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_llm_url() -> str:
|
||||||
|
if LLM_PROXY_URL:
|
||||||
|
return LLM_PROXY_URL.rstrip("/")
|
||||||
|
# fallback: base -> chat completions
|
||||||
|
return f"{LLM_API_BASE.rstrip('/')}/v1/chat/completions"
|
||||||
|
|
||||||
|
|
||||||
|
def _messages_to_prompt(messages: List[Dict[str, Any]]) -> str:
|
||||||
|
# eenvoudige, robuuste prompt-serialisatie voor /v1/completions proxies
|
||||||
|
parts: list[str] = []
|
||||||
|
for m in messages:
|
||||||
|
role = (m.get("role") or "user").upper()
|
||||||
|
content = m.get("content") or ""
|
||||||
|
parts.append(f"{role}: {content}")
|
||||||
|
parts.append("ASSISTANT:")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _chat_from_text(text: str) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"object": "chat.completion",
|
||||||
|
"choices": [{
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"message": {"role": "assistant", "content": text},
|
||||||
|
}],
|
||||||
|
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _sync_model_infer(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
url = _resolve_llm_url()
|
||||||
|
try:
|
||||||
|
with httpx.Client(timeout=LLM_REQUEST_TIMEOUT) as client:
|
||||||
|
# detect endpoint type
|
||||||
|
is_chat = "/chat/" in url or url.endswith("/chat/completions")
|
||||||
|
if is_chat:
|
||||||
|
resp = client.post(url, json=payload)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return resp.json()
|
||||||
|
# /v1/completions style
|
||||||
|
messages = payload.get("messages") or []
|
||||||
|
prompt = payload.get("prompt")
|
||||||
|
if not prompt:
|
||||||
|
prompt = _messages_to_prompt(messages)
|
||||||
|
comp_payload: Dict[str, Any] = {
|
||||||
|
"model": payload.get("model") or LLM_DEFAULT_MODEL,
|
||||||
|
"prompt": prompt,
|
||||||
|
"max_tokens": payload.get("max_tokens", 2048),
|
||||||
|
"temperature": payload.get("temperature", 0.2),
|
||||||
|
"top_p": payload.get("top_p", 0.9),
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
# pass-through extras if present
|
||||||
|
for k in ("stop", "presence_penalty", "frequency_penalty"):
|
||||||
|
if k in payload:
|
||||||
|
comp_payload[k] = payload[k]
|
||||||
|
resp = client.post(url, json=comp_payload)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
# normalize to chat.completion
|
||||||
|
try:
|
||||||
|
choice = (data.get("choices") or [{}])[0]
|
||||||
|
txt = choice.get("text") or choice.get("message", {}).get("content") or ""
|
||||||
|
return _chat_from_text(txt)
|
||||||
|
except Exception:
|
||||||
|
return _chat_from_text(str(data)[:2000])
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("LLM backend call failed: %s", exc)
|
||||||
|
return _chat_from_text(f"[LLM-fout] {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
async def _llm_call(
|
||||||
|
messages: List[Dict[str, str]],
|
||||||
|
*,
|
||||||
|
stream: bool = False,
|
||||||
|
temperature: float = 0.2,
|
||||||
|
top_p: float = 0.9,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
**extra: Any,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
if stream:
|
||||||
|
raise NotImplementedError("_llm_call(stream=True) wordt momenteel niet ondersteund.")
|
||||||
|
|
||||||
|
if LLM_QUEUE is None:
|
||||||
|
raise RuntimeError("LLM_QUEUE is niet geïnitialiseerd. Roep init_llm_client(...) aan in app.py")
|
||||||
|
|
||||||
|
payload: Dict[str, Any] = {
|
||||||
|
"model": model or LLM_DEFAULT_MODEL,
|
||||||
|
"messages": messages,
|
||||||
|
"stream": False,
|
||||||
|
"temperature": float(temperature),
|
||||||
|
"top_p": float(top_p),
|
||||||
|
}
|
||||||
|
if max_tokens is not None:
|
||||||
|
payload["max_tokens"] = int(max_tokens)
|
||||||
|
payload.update(extra)
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
try:
|
||||||
|
response: Dict[str, Any] = await loop.run_in_executor(
|
||||||
|
None, lambda: LLM_QUEUE.request_agent_sync(payload)
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("_llm_call via agent-queue failed: %s", exc)
|
||||||
|
return _chat_from_text(f"[LLM-queue-fout] {exc}")
|
||||||
137
queue_helper.py
Normal file
137
queue_helper.py
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
# -------------------------------------------------------------
|
||||||
|
# queue_helper.py – minimalistisch thread‑based wachtrij‑manager
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
import threading, queue, uuid, time
|
||||||
|
from typing import Callable, Any, Dict
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Configuratie – pas eventueel aan
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
USER_MAX_QUEUE = 20 # max. wachtende gebruikers
|
||||||
|
AGENT_MAX_QUEUE = 50 # max. wachtende agents (stil)
|
||||||
|
UPDATE_INTERVAL = 10.0 # sec tussen “position‑update” berichten
|
||||||
|
WORKER_TIMEOUT = 30.0 # max. tijd die een inference mag duren
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
class _Job:
|
||||||
|
__slots__ = ("job_id","payload","callback","created_at","event","result","error")
|
||||||
|
def __init__(self, payload: Dict, callback: Callable[[Dict], None]):
|
||||||
|
self.job_id = str(uuid.uuid4())
|
||||||
|
self.payload = payload
|
||||||
|
self.callback = callback
|
||||||
|
self.created_at = time.time()
|
||||||
|
self.event = threading.Event()
|
||||||
|
self.result = None
|
||||||
|
self.error = None
|
||||||
|
def set_success(self, answer: Dict):
|
||||||
|
self.result = answer
|
||||||
|
self.event.set()
|
||||||
|
self.callback(answer)
|
||||||
|
def set_error(self, exc: Exception):
|
||||||
|
self.error = str(exc)
|
||||||
|
self.event.set()
|
||||||
|
self.callback({"error": self.error})
|
||||||
|
|
||||||
|
class QueueManager:
|
||||||
|
"""Beheert één gedeelde queue + één worker‑thread."""
|
||||||
|
def __init__(self, model_infer_fn: Callable[[Dict], Dict]):
|
||||||
|
self._infer_fn = model_infer_fn
|
||||||
|
self._user_q = queue.Queue(maxsize=USER_MAX_QUEUE)
|
||||||
|
self._agent_q = queue.Queue(maxsize=AGENT_MAX_QUEUE)
|
||||||
|
self._shutdown = threading.Event()
|
||||||
|
self._worker = threading.Thread(target=self._run_worker,
|
||||||
|
daemon=True,
|
||||||
|
name="LLM‑worker")
|
||||||
|
self._worker.start()
|
||||||
|
|
||||||
|
# ---------- public API ----------
|
||||||
|
def enqueue_user(
|
||||||
|
self,
|
||||||
|
payload: Dict,
|
||||||
|
progress_cb: Callable[[Dict], None],
|
||||||
|
*,
|
||||||
|
notify_position: bool = False,
|
||||||
|
) -> tuple[str, int]:
|
||||||
|
job = _Job(payload, progress_cb)
|
||||||
|
try: self._user_q.put_nowait(job)
|
||||||
|
except queue.Full: raise RuntimeError(f"User‑queue vol (≥{USER_MAX_QUEUE})")
|
||||||
|
position = self._user_q.qsize()
|
||||||
|
if notify_position:
|
||||||
|
# start een aparte notifier-thread die periodiek de wachtrijpositie meldt
|
||||||
|
start_position_notifier(job, self._user_q)
|
||||||
|
return job.job_id, position
|
||||||
|
|
||||||
|
def enqueue_agent(self, payload: Dict, progress_cb: Callable[[Dict], None]) -> str:
|
||||||
|
job = _Job(payload, progress_cb)
|
||||||
|
try: self._agent_q.put_nowait(job)
|
||||||
|
except queue.Full: raise RuntimeError(f"Agent‑queue vol (≥{AGENT_MAX_QUEUE})")
|
||||||
|
return job.job_id
|
||||||
|
|
||||||
|
# ---------- sync helper voor agents/tools ----------
|
||||||
|
def request_agent_sync(self, payload: Dict, timeout: float = WORKER_TIMEOUT) -> Dict:
|
||||||
|
"""
|
||||||
|
Gebruik dit voor interne calls (agents/tools).
|
||||||
|
- Job wordt in de agent-queue gezet (lagere prioriteit dan users).
|
||||||
|
- We wachten blokkerend tot de worker klaar is of tot timeout.
|
||||||
|
- Er worden GEEN wachtrij-meldingen ("U bent #...") verstuurd.
|
||||||
|
"""
|
||||||
|
result_box: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
def _cb(msg: Dict):
|
||||||
|
# alleen het eindresultaat is interessant voor tools/agents
|
||||||
|
result_box["answer"] = msg
|
||||||
|
|
||||||
|
job = _Job(payload, _cb)
|
||||||
|
try:
|
||||||
|
self._agent_q.put_nowait(job)
|
||||||
|
except queue.Full:
|
||||||
|
raise RuntimeError(f"Agent-queue vol (≥{AGENT_MAX_QUEUE})")
|
||||||
|
|
||||||
|
ok = job.event.wait(timeout)
|
||||||
|
if not ok:
|
||||||
|
raise TimeoutError(f"LLM-inference duurde langer dan {timeout} seconden.")
|
||||||
|
if job.error:
|
||||||
|
raise RuntimeError(job.error)
|
||||||
|
return result_box.get("answer") or {}
|
||||||
|
# ---------- worker ----------
|
||||||
|
def _run_worker(self):
|
||||||
|
while not self._shutdown.is_set():
|
||||||
|
job = self._pop_job(self._user_q) or self._pop_job(self._agent_q)
|
||||||
|
if not job:
|
||||||
|
time.sleep(0.1)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
answer = self._infer_fn(job.payload)
|
||||||
|
job.set_success(answer)
|
||||||
|
except Exception as exc:
|
||||||
|
job.set_error(exc)
|
||||||
|
|
||||||
|
def _pop_job(self, q: queue.Queue):
|
||||||
|
try: return q.get_nowait()
|
||||||
|
except queue.Empty: return None
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
self._shutdown.set()
|
||||||
|
self._worker.join(timeout=5)
|
||||||
|
|
||||||
|
def start_position_notifier(
|
||||||
|
job: _Job,
|
||||||
|
queue_ref: queue.Queue,
|
||||||
|
interval: float = UPDATE_INTERVAL,
|
||||||
|
):
|
||||||
|
"""Stuurt elke `interval` seconden een bericht met de huidige positie."""
|
||||||
|
def _notifier():
|
||||||
|
# Stop zodra het job-event wordt gezet (success/fout/timeout upstream)
|
||||||
|
while not job.event.wait(interval):
|
||||||
|
# Neem een snapshot van de queue-inhoud op een thread-safe manier
|
||||||
|
with queue_ref.mutex:
|
||||||
|
snapshot = list(queue_ref.queue)
|
||||||
|
try:
|
||||||
|
pos = snapshot.index(job) + 1 # 1-based
|
||||||
|
except ValueError:
|
||||||
|
# Job staat niet meer in de wachtrij → geen updates meer nodig
|
||||||
|
break
|
||||||
|
job.callback({"info": f"U bent #{pos} in de wachtrij. Even geduld…" })
|
||||||
|
t = threading.Thread(target=_notifier, daemon=True)
|
||||||
|
t.start()
|
||||||
|
return t
|
||||||
7
rebuild.sh
Executable file
7
rebuild.sh
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
export HTTP_PROXY=http://192.168.100.2:8118
|
||||||
|
export HTTPS_PROXY=http://192.168.100.2:8118
|
||||||
|
export http_proxy=http://192.168.100.2:8118
|
||||||
|
export https_proxy=http://192.168.100.2:8118
|
||||||
|
docker stop toolserver
|
||||||
|
docker rm -f toolserver 2>/dev/null || true
|
||||||
|
docker build -t toolserver . --build-arg http_proxy=http://192.168.100.2:8118 --build-arg https_proxy=http://192.168.100.2:8118
|
||||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn[standard]
|
||||||
|
requests
|
||||||
|
python-docx
|
||||||
|
pypdf
|
||||||
604
smart_rag.py
Normal file
604
smart_rag.py
Normal file
@ -0,0 +1,604 @@
|
|||||||
|
# smart_rag.py
|
||||||
|
# Kleine util-laag voor intent + hybride retrieval + context-assemblage.
|
||||||
|
from __future__ import annotations
|
||||||
|
import os, re, json, math, hashlib
|
||||||
|
from typing import List, Dict, Tuple, DefaultDict, Optional
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _decamel(s: str) -> str:
|
||||||
|
s = re.sub(r"([a-z])([A-Z])", r"\1 \2", s)
|
||||||
|
s = s.replace("_", " ")
|
||||||
|
return re.sub(r"\s+", " ", s).strip()
|
||||||
|
|
||||||
|
def _symbol_guess(q: str) -> list[str]:
|
||||||
|
# pak langste 'code-achtig' token als symboolkandidaat
|
||||||
|
toks = re.findall(r"[A-Za-z_][A-Za-z0-9_]{2,}", q)
|
||||||
|
toks.sort(key=len, reverse=True)
|
||||||
|
return toks[:2]
|
||||||
|
|
||||||
|
def _simple_variants(q: str, max_k: int = 3) -> list[str]:
|
||||||
|
base = [q]
|
||||||
|
lo = q.lower().strip()
|
||||||
|
if lo and lo not in base:
|
||||||
|
base.append(lo)
|
||||||
|
dec = _decamel(q)
|
||||||
|
if dec and dec.lower() != lo and dec not in base:
|
||||||
|
base.append(dec)
|
||||||
|
syms = _symbol_guess(q)
|
||||||
|
for s in syms:
|
||||||
|
v = s.replace("_", " ")
|
||||||
|
if v not in base:
|
||||||
|
base.append(v)
|
||||||
|
v2 = s # raw symbool
|
||||||
|
if v2 not in base:
|
||||||
|
base.append(v2)
|
||||||
|
# cap
|
||||||
|
return base[: max(1, min(len(base), max_k))]
|
||||||
|
|
||||||
|
|
||||||
|
# --- Query routing + RRF fuse ---
|
||||||
|
|
||||||
|
def _route_query_buckets(q: str) -> list[dict]:
|
||||||
|
"""Hele lichte router: retourneert lijst subqueries met optionele path filters en boost."""
|
||||||
|
lo = (q or "").lower()
|
||||||
|
buckets = []
|
||||||
|
|
||||||
|
# Queue/Jobs/Event pipeline (Laravel)
|
||||||
|
if any(w in lo for w in ["job", "queue", "listener", "event", "dispatch"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "app/Jobs", "boost": 1.18})
|
||||||
|
buckets.append({"q": q, "path_contains": "app/Listeners", "boost": 1.12})
|
||||||
|
buckets.append({"q": q, "path_contains": "app/Events", "boost": 1.10})
|
||||||
|
# Models / Migrations
|
||||||
|
if any(w in lo for w in ["model", "eloquent", "scope", "attribute"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "app/Models", "boost": 1.12})
|
||||||
|
if any(w in lo for w in ["migration", "schema", "table", "column"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "database/migrations", "boost": 1.08})
|
||||||
|
|
||||||
|
# Laravel/Blade/UI
|
||||||
|
if any(w in lo for w in ["blade", "view", "template", "button", "placeholder", "label"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "resources/views", "boost": 1.2})
|
||||||
|
# Routes/controllers
|
||||||
|
if any(w in lo for w in ["route", "controller", "middleware", "api", "web.php", "controller@"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "routes", "boost": 1.15})
|
||||||
|
buckets.append({"q": q, "path_contains": "app/Http/Controllers", "boost": 1.2})
|
||||||
|
# Config/ENV
|
||||||
|
if any(w in lo for w in ["env", "config", "database", "queue", "cache"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "config", "boost": 1.15})
|
||||||
|
buckets.append({"q": q, "path_contains": ".env", "boost": 1.1})
|
||||||
|
# Docs/README
|
||||||
|
if any(w in lo for w in ["readme", "install", "setup", "document", "usage"]):
|
||||||
|
buckets.append({"q": q, "path_contains": "README", "boost": 1.05})
|
||||||
|
buckets.append({"q": q, "path_contains": "docs", "boost": 1.05})
|
||||||
|
|
||||||
|
# Fallback: generiek
|
||||||
|
buckets.append({"q": q, "path_contains": None, "boost": 1.0})
|
||||||
|
# dedup op (q, path_contains)
|
||||||
|
seen = set(); out = []
|
||||||
|
for b in buckets:
|
||||||
|
key = (b["q"], b["path_contains"])
|
||||||
|
if key in seen: continue
|
||||||
|
seen.add(key); out.append(b)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def rrf_fuse_ranked_lists(ranked_lists: list[list[dict]], k: int = 60) -> list[dict]:
|
||||||
|
"""
|
||||||
|
ranked_lists: bv. [[{key,score,item},...], ...] (elk al per kanaal/bucket gesorteerd)
|
||||||
|
Return: één samengevoegde lijst (dicts) met veld 'score_fused'.
|
||||||
|
"""
|
||||||
|
# bouw mapping
|
||||||
|
pos_maps: list[dict] = []
|
||||||
|
for rl in ranked_lists or []:
|
||||||
|
pos = {}
|
||||||
|
for i, it in enumerate(rl, 1):
|
||||||
|
meta = it.get("metadata") or {}
|
||||||
|
key = f"{meta.get('repo','')}::{meta.get('path','')}::{meta.get('chunk_index','')}"
|
||||||
|
pos[key] = i
|
||||||
|
pos_maps.append(pos)
|
||||||
|
|
||||||
|
fused: dict[str, float] = {}
|
||||||
|
ref_item: dict[str, dict] = {}
|
||||||
|
for idx, rl in enumerate(ranked_lists or []):
|
||||||
|
pos_map = pos_maps[idx]
|
||||||
|
for it in rl:
|
||||||
|
meta = it.get("metadata") or {}
|
||||||
|
key = f"{meta.get('repo','')}::{meta.get('path','')}::{meta.get('chunk_index','')}"
|
||||||
|
r = pos_map.get(key, 10**9)
|
||||||
|
fused[key] = fused.get(key, 0.0) + 1.0 / (k + r)
|
||||||
|
ref_item[key] = it
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for key, f in fused.items():
|
||||||
|
it = dict(ref_item[key])
|
||||||
|
it["score_fused"] = f
|
||||||
|
out.append(it)
|
||||||
|
out.sort(key=lambda x: x.get("score_fused", 0.0), reverse=True)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _rrf_from_ranklists(ranklists: List[List[str]], k: int = int(os.getenv("RRF_K", "60"))) -> Dict[str, float]:
|
||||||
|
"""
|
||||||
|
Reciprocal Rank Fusion: neemt geordende lijsten (best eerst) en
|
||||||
|
geeft samengevoegde scores {key: rrf_score}.
|
||||||
|
"""
|
||||||
|
acc = defaultdict(float)
|
||||||
|
for lst in ranklists:
|
||||||
|
for i, key in enumerate(lst):
|
||||||
|
acc[key] += 1.0 / (k + i + 1)
|
||||||
|
return acc
|
||||||
|
|
||||||
|
def _path_prior(path: str) -> float:
|
||||||
|
"""
|
||||||
|
Light-weight prior per pad. 0..1 schaal. Laravel paden krijgen bonus,
|
||||||
|
generieke code dirs ook een kleine bonus; binaire/test/asset minder.
|
||||||
|
"""
|
||||||
|
p = (path or "").replace("\\", "/").lower()
|
||||||
|
bonus = 0.0
|
||||||
|
# Laravel priors
|
||||||
|
if p.startswith("routes/"): bonus += 0.35
|
||||||
|
if p.startswith("app/http/controllers/"): bonus += 0.30
|
||||||
|
if p.startswith("resources/views/"): bonus += 0.25
|
||||||
|
if p.endswith(".blade.php"): bonus += 0.15
|
||||||
|
# Generieke priors
|
||||||
|
if p.startswith(("src/", "app/", "lib/", "pages/", "components/")): bonus += 0.12
|
||||||
|
if p.endswith((".php",".ts",".tsx",".js",".jsx",".py",".go",".rb",".java",".cs",".vue",".html",".md")):
|
||||||
|
bonus += 0.05
|
||||||
|
# Demote obvious low-signal
|
||||||
|
if "/tests/" in p or p.startswith(("tests/", "test/")): bonus -= 0.10
|
||||||
|
if p.endswith((".lock",".map",".min.js",".min.css")): bonus -= 0.10
|
||||||
|
return max(0.0, min(1.0, bonus))
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_json_loads(s: str):
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
t = s.strip()
|
||||||
|
if t.startswith("```"):
|
||||||
|
t = re.sub(r"^```(?:json)?", "", t, count=1, flags=re.IGNORECASE).strip()
|
||||||
|
if t.endswith("```"):
|
||||||
|
t = t[:-3].strip()
|
||||||
|
try:
|
||||||
|
return json.loads(t)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _tok(s: str) -> List[str]:
|
||||||
|
return re.findall(r"[A-Za-z0-9_]+", s.lower())
|
||||||
|
|
||||||
|
def _jaccard(a: str, b: str) -> float:
|
||||||
|
A, B = set(_tok(a)), set(_tok(b))
|
||||||
|
if not A or not B: return 0.0
|
||||||
|
# heel kleine set-caps (noodrem tegen pathologische inputs)
|
||||||
|
if len(B) > 8000:
|
||||||
|
# reduceer B met stabiele (deterministische) sampling op basis van sha1
|
||||||
|
def _stable_byte(tok: str) -> int:
|
||||||
|
return hashlib.sha1(tok.encode("utf-8")).digest()[0]
|
||||||
|
B = {t for t in B if _stable_byte(t) < 64} # ~25% sample
|
||||||
|
return len(A & B) / max(1, len(A | B))
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize(xs: List[float]) -> List[float]:
|
||||||
|
if not xs: return xs
|
||||||
|
lo, hi = min(xs), max(xs)
|
||||||
|
if hi <= lo: return [0.0]*len(xs)
|
||||||
|
return [(x - lo) / (hi - lo) for x in xs]
|
||||||
|
|
||||||
|
async def enrich_intent(llm_call_fn, messages: List[Dict]) -> Dict:
|
||||||
|
"""
|
||||||
|
Zet ongestructureerde vraag om naar een compact plan.
|
||||||
|
Velden: task, constraints, file_hints, keywords, acceptance, ask(optional).
|
||||||
|
"""
|
||||||
|
user_text = ""
|
||||||
|
for m in reversed(messages):
|
||||||
|
if m.get("role") == "user":
|
||||||
|
user_text = m.get("content","").strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
sys = ("Je herstructureert een developer-vraag naar JSON. "
|
||||||
|
"Geef ALLEEN JSON, geen toelichting.")
|
||||||
|
usr = (
|
||||||
|
"Zet de essentie van de vraag om naar dit schema:\n"
|
||||||
|
"{"
|
||||||
|
"\"task\": str, "
|
||||||
|
"\"constraints\": [str,...], "
|
||||||
|
"\"file_hints\": [str,...], "
|
||||||
|
"\"keywords\": [str,...], "
|
||||||
|
"\"acceptance\": [str,...], "
|
||||||
|
"\"ask\": str|null "
|
||||||
|
"}\n\n"
|
||||||
|
f"Vraag:\n{user_text}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
resp = await llm_call_fn(
|
||||||
|
[{"role":"system","content":sys},{"role":"user","content":usr}],
|
||||||
|
stream=False, temperature=0.1, top_p=1.0, max_tokens=512
|
||||||
|
)
|
||||||
|
raw = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","{}")
|
||||||
|
spec = _safe_json_loads(raw) or {"task": user_text, "constraints": [], "file_hints": [], "keywords": [], "acceptance": [], "ask": None} #json.loads(raw.strip())
|
||||||
|
except Exception:
|
||||||
|
# Veilige defaults
|
||||||
|
spec = {
|
||||||
|
"task": user_text,
|
||||||
|
"constraints": [],
|
||||||
|
"file_hints": [],
|
||||||
|
"keywords": [],
|
||||||
|
"acceptance": [],
|
||||||
|
"ask": None
|
||||||
|
}
|
||||||
|
# Minimalistische fallback sanity
|
||||||
|
for k in ("constraints","file_hints","keywords","acceptance"):
|
||||||
|
if not isinstance(spec.get(k), list):
|
||||||
|
spec[k] = []
|
||||||
|
if not isinstance(spec.get("task"), str):
|
||||||
|
spec["task"] = user_text
|
||||||
|
if spec.get("ask") is not None and not isinstance(spec["ask"], str):
|
||||||
|
spec["ask"] = None
|
||||||
|
return spec
|
||||||
|
|
||||||
|
async def expand_queries(llm_call_fn, q: str, k: int = 3) -> List[str]:
|
||||||
|
if str(os.getenv("RAG_EXPAND_QUERIES","1")).lower() in ("0","false"):
|
||||||
|
return [q]
|
||||||
|
sys = "Geef 3-4 korte NL/EN zoekvarianten als JSON array. Geen toelichting."
|
||||||
|
usr = f"Bronvraag:\n{q}\n\nAlleen JSON array."
|
||||||
|
try:
|
||||||
|
resp = await llm_call_fn(
|
||||||
|
[{"role":"system","content":sys},{"role":"user","content":usr}],
|
||||||
|
stream=False, temperature=0.2, top_p=0.9, max_tokens=240
|
||||||
|
)
|
||||||
|
raw = (resp.get("choices",[{}])[0].get("message",{}) or {}).get("content","[]")
|
||||||
|
arr = _safe_json_loads(raw) or []
|
||||||
|
arr = [str(x).strip() for x in arr if str(x).strip()]
|
||||||
|
seen = {q.lower()}
|
||||||
|
base = [q]
|
||||||
|
for v in arr:
|
||||||
|
lv = v.lower()
|
||||||
|
if lv not in seen:
|
||||||
|
base.append(v); seen.add(lv)
|
||||||
|
return base[: max(1, min(len(base), k + 1))]
|
||||||
|
except Exception:
|
||||||
|
return [q]
|
||||||
|
|
||||||
|
def _sim_from_chroma_distance(d: float|None) -> float:
|
||||||
|
"""
|
||||||
|
Converteer (Chroma) distance naar similarity in [0,1]; defensief tegen None/NaN/negatief.
|
||||||
|
"""
|
||||||
|
if d is None:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
dv = float(d)
|
||||||
|
except Exception:
|
||||||
|
dv = 0.0
|
||||||
|
if not math.isfinite(dv) or dv < 0:
|
||||||
|
return 0.0
|
||||||
|
return 1.0 / (1.0 + dv)
|
||||||
|
|
||||||
|
|
||||||
|
async def hybrid_retrieve(
|
||||||
|
rag_query_internal_fn,
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
repo: str|None = None,
|
||||||
|
profile: str|None = None,
|
||||||
|
path_contains: str|None = None,
|
||||||
|
per_query_k: int = 30,
|
||||||
|
n_results: int = 8,
|
||||||
|
alpha: float = 0.6,
|
||||||
|
collection_name: str = "code_docs",
|
||||||
|
llm_call_fn=None,
|
||||||
|
) -> List[Dict]:
|
||||||
|
"""
|
||||||
|
Multi-variant retrieval met RRF-fusie + path-prior.
|
||||||
|
Return: lijst met dict(document, metadata, score)
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Optionele query-routing + RRF
|
||||||
|
use_route = str(os.getenv("RAG_ROUTE", "1")).lower() not in ("0", "false")
|
||||||
|
use_rrf = str(os.getenv("RAG_RRF", "1")).lower() not in ("0", "false")
|
||||||
|
# Optionele mini multi-query expansion (default aan)
|
||||||
|
use_expand = str(os.getenv("RAG_MULTI_EXPAND", "1")).lower() in ("1","true","yes")
|
||||||
|
k_variants = max(1, int(os.getenv("RAG_MULTI_K", "3")))
|
||||||
|
per_query_k = max(1, int(per_query_k))
|
||||||
|
n_results = max(1, int(n_results))
|
||||||
|
if not (query or "").strip():
|
||||||
|
return []
|
||||||
|
# Multi-query variants:
|
||||||
|
if use_expand:
|
||||||
|
if llm_call_fn is not None:
|
||||||
|
variants = await expand_queries(llm_call_fn, query, k=k_variants)
|
||||||
|
else:
|
||||||
|
variants = _simple_variants(query, max_k=k_variants)
|
||||||
|
else:
|
||||||
|
variants = [query]
|
||||||
|
|
||||||
|
ranked_lists = [] # voor RRF (alle varianten/buckets)
|
||||||
|
for qv in variants:
|
||||||
|
if use_route:
|
||||||
|
buckets = _route_query_buckets(qv)
|
||||||
|
for b in buckets:
|
||||||
|
# combineer globale path_contains-hint met bucket-specifieke filter
|
||||||
|
pc = b.get("path_contains")
|
||||||
|
if path_contains and not pc:
|
||||||
|
pc = path_contains
|
||||||
|
res = await rag_query_internal_fn(
|
||||||
|
query=b["q"], n_results=per_query_k,
|
||||||
|
collection_name=collection_name,
|
||||||
|
repo=repo, path_contains=pc, profile=profile
|
||||||
|
)
|
||||||
|
lst = []
|
||||||
|
for item in (res or {}).get("results", []):
|
||||||
|
# distance kan ontbreken bij oudere backends; defensieve cast
|
||||||
|
dist = item.get("distance", None)
|
||||||
|
try: dist = float(dist) if dist is not None else None
|
||||||
|
except Exception: dist = None
|
||||||
|
emb_sim = _sim_from_chroma_distance(dist) * float(b.get("boost",1.0))
|
||||||
|
lst.append({**item, "emb_sim_routed": emb_sim})
|
||||||
|
lst.sort(key=lambda x: x.get("emb_sim_routed",0.0), reverse=True)
|
||||||
|
# Laat RRF voldoende kandidaten zien (niet te vroeg afsnijden):
|
||||||
|
ranked_lists.append(lst[:per_query_k])
|
||||||
|
else:
|
||||||
|
# geen routing: per variant direct query'en (consistent scoren/sorteren)
|
||||||
|
res = await rag_query_internal_fn(
|
||||||
|
query=qv, n_results=per_query_k,
|
||||||
|
collection_name=collection_name,
|
||||||
|
repo=repo, path_contains=path_contains, profile=profile
|
||||||
|
)
|
||||||
|
lst = []
|
||||||
|
for item in (res or {}).get("results", []):
|
||||||
|
dist = item.get("distance", None)
|
||||||
|
try: dist = float(dist) if dist is not None else None
|
||||||
|
except Exception: dist = None
|
||||||
|
emb_sim = _sim_from_chroma_distance(dist)
|
||||||
|
lst.append({**item, "emb_sim_routed": emb_sim})
|
||||||
|
lst.sort(key=lambda x: x.get("emb_sim_routed", 0.0), reverse=True)
|
||||||
|
ranked_lists.append(lst[:per_query_k])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Als RRF aanstaat: fuseer nu
|
||||||
|
items = rrf_fuse_ranked_lists(ranked_lists) if use_rrf else [x for rl in ranked_lists for x in rl]
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Eenvoudige lexicale score (op samengevoegde set):
|
||||||
|
# neem het BESTE van alle varianten i.p.v. alleen de hoofdquery.
|
||||||
|
bm: List[float] = []
|
||||||
|
if variants and len(variants) > 1:
|
||||||
|
for it in items:
|
||||||
|
doc = it.get("document", "") or ""
|
||||||
|
bm.append(max((_jaccard(v, doc) for v in variants), default=_jaccard(query, doc)))
|
||||||
|
else:
|
||||||
|
bm = [_jaccard(query, it.get("document","")) for it in items]
|
||||||
|
bm_norm = _normalize(bm)
|
||||||
|
|
||||||
|
out = []
|
||||||
|
for i, it in enumerate(items):
|
||||||
|
# Betere fallback: gebruik routed emb sim → plain emb_sim → distance
|
||||||
|
emb = (
|
||||||
|
float(it.get("emb_sim_routed", 0.0))
|
||||||
|
or float(it.get("emb_sim", 0.0))
|
||||||
|
or _sim_from_chroma_distance(it.get("distance"))
|
||||||
|
)
|
||||||
|
score = alpha * emb + (1.0 - alpha) * bm_norm[i]
|
||||||
|
meta = (it.get("metadata") or {})
|
||||||
|
path = meta.get("path","") or ""
|
||||||
|
# — optioneel: path-prior + symbol-boost via env —
|
||||||
|
pp_w = float(os.getenv("RAG_PATH_PRIOR_W", "0.08"))
|
||||||
|
if pp_w > 0.0:
|
||||||
|
score += pp_w * _path_prior(path)
|
||||||
|
sym_w = float(os.getenv("RAG_SYM_BOOST", "0.04"))
|
||||||
|
if sym_w > 0.0:
|
||||||
|
syms_raw = meta.get("symbols")
|
||||||
|
if isinstance(syms_raw, str):
|
||||||
|
syms = [s.strip().lower() for s in syms_raw.split(",") if s.strip()]
|
||||||
|
elif isinstance(syms_raw, list):
|
||||||
|
syms = [str(s).strip().lower() for s in syms_raw if str(s).strip()]
|
||||||
|
else:
|
||||||
|
syms = []
|
||||||
|
if syms:
|
||||||
|
q_terms = set(_tok(query))
|
||||||
|
if q_terms & set(syms):
|
||||||
|
score += sym_w
|
||||||
|
out.append({**it, "score": float(score)})
|
||||||
|
|
||||||
|
out.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
return out[:int(n_results)]
|
||||||
|
|
||||||
|
def assemble_context(chunks: List[Dict], *, max_chars: int = 24000) -> Tuple[str, float]:
|
||||||
|
"""
|
||||||
|
Budgeted stitching:
|
||||||
|
- groepeer per path
|
||||||
|
- per path: neem 1-3 fragmenten (op volgorde van chunk_index indien beschikbaar)
|
||||||
|
- verdeel char-budget over paden, zwaarder voor hogere scores
|
||||||
|
- behoud Laravel stitching
|
||||||
|
Retour: (context_text, top_score)
|
||||||
|
"""
|
||||||
|
if not chunks:
|
||||||
|
return "", 0.0
|
||||||
|
|
||||||
|
# 1) Groepeer per path en verzamel scores + (optioneel) chunk_index
|
||||||
|
by_path: Dict[str, List[Dict]] = {}
|
||||||
|
top_score = 0.0
|
||||||
|
for r in chunks:
|
||||||
|
meta = (r.get("metadata") or {})
|
||||||
|
path = meta.get("path","") or ""
|
||||||
|
r["_chunk_index"] = meta.get("chunk_index")
|
||||||
|
r["_score"] = float(r.get("score", 0.0) or 0.0)
|
||||||
|
top_score = max(top_score, r["_score"])
|
||||||
|
by_path.setdefault(path, []).append(r)
|
||||||
|
|
||||||
|
# 2) Per path: sorteer op chunk_index (indien beschikbaar) anders score; cap op N stukken
|
||||||
|
def _sort_key(x):
|
||||||
|
ci = x.get("_chunk_index")
|
||||||
|
return (0, int(ci)) if isinstance(ci, int) or (isinstance(ci, str) and str(ci).isdigit()) else (1, -x["_score"])
|
||||||
|
|
||||||
|
path_items = []
|
||||||
|
max_pieces = int(os.getenv("CTX_PIECES_PER_PATH_CAP", "3"))
|
||||||
|
for p, lst in by_path.items():
|
||||||
|
lst_sorted = sorted(lst, key=_sort_key)
|
||||||
|
path_items.append({
|
||||||
|
"path": p,
|
||||||
|
"best_score": max(x["_score"] for x in lst_sorted),
|
||||||
|
"pieces": lst_sorted[:max(1, max_pieces)], # cap per bestand
|
||||||
|
})
|
||||||
|
|
||||||
|
# 3) Sorteer paden op best_score en bereken budgetverdeling (softmax-achtig, maar bounded)
|
||||||
|
path_items.sort(key=lambda t: t["best_score"], reverse=True)
|
||||||
|
# clamp scores naar [0,1] voor stabielere allocatie
|
||||||
|
scores = [min(1.0, max(0.0, t["best_score"])) for t in path_items]
|
||||||
|
# softmax-lite: exp(score*beta) normaliseren; beta iets lager om niet te scherp te verdelen
|
||||||
|
beta = float(os.getenv("CTX_ALLOC_BETA", "2.2"))
|
||||||
|
w = [math.exp(beta * s) for s in scores]
|
||||||
|
S = max(1e-9, sum(w))
|
||||||
|
weights = [x / S for x in w]
|
||||||
|
|
||||||
|
# 4) Bouw snelle lookup path->full body (voor Laravel stitching)
|
||||||
|
by_path_first_body: Dict[str, str] = {}
|
||||||
|
for t in path_items:
|
||||||
|
doc0 = (t["pieces"][0].get("document") or "").strip()
|
||||||
|
by_path_first_body[t["path"]] = doc0
|
||||||
|
|
||||||
|
# 5) Render met budget per pad
|
||||||
|
out = []
|
||||||
|
used = 0
|
||||||
|
for t, w_i in zip(path_items, weights):
|
||||||
|
p = t["path"]
|
||||||
|
# minimaal & maximaal budget per pad (chars)
|
||||||
|
min_chars = int(os.getenv("CTX_ALLOC_MIN_PER_PATH", "1200"))
|
||||||
|
max_chars_path = int(os.getenv("CTX_ALLOC_MAX_PER_PATH", "6000"))
|
||||||
|
alloc = min(max(min_chars, int(max_chars * w_i)), max_chars_path)
|
||||||
|
|
||||||
|
# stitch 1..3 stukken van dit pad binnen alloc
|
||||||
|
header = f"### {p} (score={t['best_score']:.3f})"
|
||||||
|
block_buf = [header]
|
||||||
|
remaining = max(0, alloc - len(header) - 1)
|
||||||
|
|
||||||
|
|
||||||
|
for piece in t["pieces"]:
|
||||||
|
body = (piece.get("document") or "").strip()
|
||||||
|
# knip niet middenin een regel: neem tot remaining en rol terug tot laatste newline
|
||||||
|
if remaining <= 0:
|
||||||
|
break
|
||||||
|
if len(body) > remaining:
|
||||||
|
cut = body[:remaining]
|
||||||
|
nl = cut.rfind("\n")
|
||||||
|
if nl > 300: # laat niet té kort
|
||||||
|
body = cut[:nl] + "\n…"
|
||||||
|
else:
|
||||||
|
body = cut + "…"
|
||||||
|
block_buf.append(body)
|
||||||
|
remaining -= len(body)
|
||||||
|
if remaining <= 300: # hou wat over voor stitching
|
||||||
|
break
|
||||||
|
|
||||||
|
block = "\n".join(block_buf)
|
||||||
|
|
||||||
|
# --- Laravel mini-stitch zoals voorheen, maar budgetbewust
|
||||||
|
stitched = []
|
||||||
|
if p in ("routes/web.php", "routes/api.php"):
|
||||||
|
for ctrl_path, _meth in _laravel_pairs_from_route_text(by_path_first_body.get(p,"")):
|
||||||
|
if ctrl_path in by_path_first_body and remaining > 400:
|
||||||
|
snippet = by_path_first_body[ctrl_path][:min(400, remaining)]
|
||||||
|
stitched.append(f"\n### {ctrl_path} (stitch)\n{snippet}")
|
||||||
|
remaining -= len(snippet)
|
||||||
|
if p.startswith("app/Http/Controllers/"):
|
||||||
|
for vpath in _laravel_guess_view_paths_from_text(by_path_first_body.get(p,"")):
|
||||||
|
if vpath in by_path_first_body and remaining > 400:
|
||||||
|
snippet = by_path_first_body[vpath][:min(400, remaining)]
|
||||||
|
stitched.append(f"\n### {vpath} (stitch)\n{snippet}")
|
||||||
|
remaining -= len(snippet)
|
||||||
|
|
||||||
|
if stitched:
|
||||||
|
block += "\n" + "\n".join(stitched)
|
||||||
|
|
||||||
|
# Past het volledige blok niet meer, knip netjes i.p.v. alles laten vallen
|
||||||
|
remaining_total = max_chars - used
|
||||||
|
if remaining_total <= 0:
|
||||||
|
break
|
||||||
|
if len(block) > remaining_total:
|
||||||
|
# Zorg dat we niet midden in markdown header afkappen
|
||||||
|
trimmed = block[:max(0, remaining_total - 1)]
|
||||||
|
block = trimmed + "…"
|
||||||
|
out.append(block)
|
||||||
|
used = max_chars
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
out.append(block)
|
||||||
|
used += len(block)
|
||||||
|
|
||||||
|
|
||||||
|
# stop vroeg als we het budget bijna op hebben
|
||||||
|
if max_chars - used < 800:
|
||||||
|
break
|
||||||
|
|
||||||
|
return ("\n\n".join(out), float(top_score))
|
||||||
|
|
||||||
|
# --- Laravel route/controller/view helpers (lightweight, cycle-safe) ---
|
||||||
|
|
||||||
|
def _laravel_pairs_from_route_text(route_text: str):
|
||||||
|
"""
|
||||||
|
Parse routes/web.php|api.php tekst en yield (controller_path, method) guesses.
|
||||||
|
Ondersteunt:
|
||||||
|
- 'Controller@method'
|
||||||
|
- FQCN zoals App\\Http\\Controllers\\Foo\\BarController::class
|
||||||
|
"""
|
||||||
|
out = []
|
||||||
|
|
||||||
|
# 1) 'Controller@method'
|
||||||
|
for m in re.finditer(r"['\"]([A-Za-z0-9_\\]+)@([A-Za-z0-9_]+)['\"]", route_text):
|
||||||
|
fq = m.group(1)
|
||||||
|
method = m.group(2)
|
||||||
|
ctrl = fq.replace("\\\\","/").replace("\\","/")
|
||||||
|
name = ctrl.split("/")[-1]
|
||||||
|
guess = f"app/Http/Controllers/{ctrl}.php"
|
||||||
|
alt = f"app/Http/Controllers/{name}.php"
|
||||||
|
out.append((guess, method))
|
||||||
|
out.append((alt, method))
|
||||||
|
|
||||||
|
# 2) FQCN ::class
|
||||||
|
for m in re.finditer(r"([A-Za-z_][A-Za-z0-9_\\]+)\s*::\s*class", route_text):
|
||||||
|
fq = m.group(1)
|
||||||
|
ctrl = fq.replace("\\\\","/").replace("\\","/")
|
||||||
|
name = ctrl.split("/")[-1]
|
||||||
|
guess = f"app/Http/Controllers/{ctrl}.php"
|
||||||
|
alt = f"app/Http/Controllers/{name}.php"
|
||||||
|
out.append((guess, None))
|
||||||
|
out.append((alt, None))
|
||||||
|
|
||||||
|
# dedupe, behoud orde
|
||||||
|
seen = set(); dedup = []
|
||||||
|
for p in out:
|
||||||
|
if p not in seen:
|
||||||
|
seen.add(p); dedup.append(p)
|
||||||
|
return dedup
|
||||||
|
|
||||||
|
|
||||||
|
def _laravel_guess_view_paths_from_text(controller_text: str):
|
||||||
|
"""
|
||||||
|
Parse simpele 'return view(\"foo.bar\")' patronen → resources/views/foo/bar.blade.php
|
||||||
|
"""
|
||||||
|
out = []
|
||||||
|
for m in re.finditer(r"view\(\s*['\"]([A-Za-z0-9_.\/-]+)['\"]\s*\)", controller_text):
|
||||||
|
view = m.group(1).strip().strip(".")
|
||||||
|
# 'foo.bar' of 'foo/bar'
|
||||||
|
path = view.replace(".", "/")
|
||||||
|
out.append(f"resources/views/{path}.blade.php")
|
||||||
|
# dedupe
|
||||||
|
seen = set(); dedup = []
|
||||||
|
for p in out:
|
||||||
|
if p not in seen:
|
||||||
|
seen.add(p); dedup.append(p)
|
||||||
|
return dedup
|
||||||
|
|
||||||
|
# Public API surface
|
||||||
|
__all__ = [
|
||||||
|
"enrich_intent",
|
||||||
|
"expand_queries",
|
||||||
|
"hybrid_retrieve",
|
||||||
|
"assemble_context",
|
||||||
|
"_laravel_pairs_from_route_text",
|
||||||
|
"_laravel_guess_view_paths_from_text",
|
||||||
|
]
|
||||||
|
|
||||||
1
toolserver.sh
Executable file
1
toolserver.sh
Executable file
@ -0,0 +1 @@
|
|||||||
|
docker run -d --rm --name toolserver --network host -v /opt/SentenceTransformer:/opt/sentence-transformers -v /opt/piper/voices:/voices:ro -e LLM_TOOL_RUNNER=bridge -e LLM_UPSTREAMS="http://localhost:8000/v1/chat/completions,http://localhost:8001/v1/chat/completions" -e LLM_MAX_CONCURRENCY=2 -e REPO_AGENT_SMART=1 -e RAG_EXPAND_QUERIES=1 -e RAG_EXPAND_K=3 -e RAG_PER_QUERY_K=30 -e RAG_N_RESULT=8 -e RAG_EMB_WEIGHT=0.6 -e REPO_AGENT_CONTEXT_CHARS=24000 -e REPO_AGENT_ASK_CLARIFY=1 -e REPO_AGENT_ASK_THRESHOLD=0.35 -e PIPER_BIN=/usr/local/bin/piper -e PIPER_VOICE=/voices/nl_NL-mls-medium.onnx.gz -e LLM_WINDOWING_ENABLE=0 -e LLM_CONTEXT_TOKENS=42000 -e LLM_RESPONSE_RESERVE=1024 -e LLM_AUTO_CONTINUES=0 -e LLM_FUNCTION_CALLING_MODE=auto -e RAG_EMB_WEIGHT=0.6 -e LLM_URL="http://localhost:8000/v1/chat/completions" -e NO_PROXY="127.0.0.1,localhost,::1,host.docker.internal" -e RAG_TORCH_THREADS=6 -e OMP_NUM_THREADS=6 -e MKL_NUM_THREADS=6 -e OPENBLAS_NUM_THREADS=6 -e NUMEXPR_NUM_THREADS=6 -e LLM_READ_TIMEOUT=3600 -e NO_PROXY=localhost,127.0.0.1,::1,192.168.100.1,192.168.100.2 -e HTTP_PROXY=http://192.168.100.2:8118 -e HTTPS_PROXY=http://192.168.100.2:8118 -e MEILI_URL=http://localhost:7700 -e MEILI_KEY=0xipOmfgi_zMgdFplSdv7L8mlx0RPMQCNxVTNJc54lQ --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -e FORCE_ALL_TOOLS=0 -e AUTO_CONTINUE=0 -e LLM_PROXY_URL="http://192.168.100.1:8081/v1/chat/completions" -e ALLOWED_GIT_HOSTS="192.168.100.1,localhost,127.0.0.1,10.25.138.40" -e STREAM_PREFER_DIRECT=1 toolserver
|
||||||
BIN
toolserver_toolset_v2.zip
Normal file
BIN
toolserver_toolset_v2.zip
Normal file
Binary file not shown.
333
web_search.py
Normal file
333
web_search.py
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from requests import get
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import concurrent.futures
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
from urllib.parse import urlparse, urljoin
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, List, Optional
|
||||||
|
import httpx
|
||||||
|
logger = logging.getLogger("web_search")
|
||||||
|
|
||||||
|
async def call_improve_web_query(text: str, max_chars: int = 20000, objective: str = None, style: str = None):
|
||||||
|
url = "http://localhost:8080/improve_web_query"
|
||||||
|
params = {"text": text, "max_chars": max_chars}
|
||||||
|
if objective:
|
||||||
|
params["objective"] = objective
|
||||||
|
if style:
|
||||||
|
params["style"] = style
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.get(url, params=params)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
class HelpFunctions:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def get_base_url(self, url: str) -> str:
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||||
|
return base_url
|
||||||
|
|
||||||
|
def generate_excerpt(self, content: str, max_length: int = 200) -> str:
|
||||||
|
return content[:max_length] + "..." if len(content) > max_length else content
|
||||||
|
|
||||||
|
def format_text(self, original_text: str) -> str:
|
||||||
|
soup = BeautifulSoup(original_text, "html.parser")
|
||||||
|
formatted_text = soup.get_text(separator=" ", strip=True)
|
||||||
|
formatted_text = unicodedata.normalize("NFKC", formatted_text)
|
||||||
|
formatted_text = re.sub(r"\s+", " ", formatted_text)
|
||||||
|
formatted_text = formatted_text.strip()
|
||||||
|
formatted_text = self.remove_emojis(formatted_text)
|
||||||
|
return formatted_text
|
||||||
|
|
||||||
|
def remove_emojis(self, text: str) -> str:
|
||||||
|
return "".join(c for c in text if not unicodedata.category(c).startswith("So"))
|
||||||
|
|
||||||
|
def process_search_result(self, result: dict, valves: Any) -> Optional[dict]:
|
||||||
|
title_site = self.remove_emojis(result["title"])
|
||||||
|
url_site = result["url"]
|
||||||
|
snippet = result.get("content", "")
|
||||||
|
|
||||||
|
# Check if the website is in the ignored list, but only if IGNORED_WEBSITES is not empty
|
||||||
|
if valves.IGNORED_WEBSITES:
|
||||||
|
base_url = self.get_base_url(url_site)
|
||||||
|
if any(
|
||||||
|
ignored_site.strip() in base_url
|
||||||
|
for ignored_site in valves.IGNORED_WEBSITES.split(",")
|
||||||
|
):
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
response_site = requests.get(url_site, timeout=20)
|
||||||
|
response_site.raise_for_status()
|
||||||
|
html_content = response_site.text
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
content_site = self.format_text(soup.get_text(separator=" ", strip=True))
|
||||||
|
|
||||||
|
truncated_content = self.truncate_to_n_words(
|
||||||
|
content_site, valves.PAGE_CONTENT_WORDS_LIMIT
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"title": title_site,
|
||||||
|
"url": url_site,
|
||||||
|
"content": truncated_content,
|
||||||
|
"snippet": self.remove_emojis(snippet),
|
||||||
|
}
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def truncate_to_n_words(self, text: str, token_limit: int) -> str:
|
||||||
|
tokens = text.split()
|
||||||
|
truncated_tokens = tokens[:token_limit]
|
||||||
|
return " ".join(truncated_tokens)
|
||||||
|
|
||||||
|
class EventEmitter:
|
||||||
|
def __init__(self, event_emitter: Optional[Any] = None):
|
||||||
|
self.event_emitter = event_emitter
|
||||||
|
|
||||||
|
async def emit(self, description: str = "Unknown State", status: str = "in_progress", done: bool = False):
|
||||||
|
if self.event_emitter:
|
||||||
|
await self.event_emitter(
|
||||||
|
{
|
||||||
|
"type": "status",
|
||||||
|
"data": {
|
||||||
|
"status": status,
|
||||||
|
"description": description,
|
||||||
|
"done": done,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
class Tools:
|
||||||
|
class Valves(BaseModel):
|
||||||
|
SEARXNG_ENGINE_API_BASE_URL: str = Field(
|
||||||
|
default="http://192.168.100.1:8899/search",
|
||||||
|
description="The base URL for SearXNG Search Engine",
|
||||||
|
)
|
||||||
|
IGNORED_WEBSITES: str = Field(
|
||||||
|
default="",
|
||||||
|
description="Comma-separated list of websites to ignore",
|
||||||
|
)
|
||||||
|
RETURNED_SCRAPPED_PAGES_NO: int = Field(
|
||||||
|
default=3,
|
||||||
|
description="The number of Search Engine Results to Parse",
|
||||||
|
)
|
||||||
|
SCRAPPED_PAGES_NO: int = Field(
|
||||||
|
default=5,
|
||||||
|
description="Total pages scapped. Ideally greater than one of the returned pages",
|
||||||
|
)
|
||||||
|
PAGE_CONTENT_WORDS_LIMIT: int = Field(
|
||||||
|
default=5000,
|
||||||
|
description="Limit words content for each page.",
|
||||||
|
)
|
||||||
|
CITATION_LINKS: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description="If True, send custom citations with links",
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.valves = self.Valves()
|
||||||
|
self.headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
||||||
|
}
|
||||||
|
|
||||||
|
async def search_web(
|
||||||
|
self,
|
||||||
|
query: str
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Search the web using SearXNG and get the content of the relevant pages.
|
||||||
|
:param query: Web Query used in search engine.
|
||||||
|
:param __event_emitter__: Optional event emitter for status updates.
|
||||||
|
:return: The content of the pages in json format.
|
||||||
|
"""
|
||||||
|
logger.info(" in query: %s",query)
|
||||||
|
try:
|
||||||
|
#query=asyncio.run(call_improve_web_query(query))
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("ERROR: %s",str(e))
|
||||||
|
logger.info(" out query: %s",query)
|
||||||
|
__event_emitter__=None
|
||||||
|
functions = HelpFunctions()
|
||||||
|
emitter = EventEmitter(__event_emitter__)
|
||||||
|
|
||||||
|
await emitter.emit(description=f"Initiating web search for: {query}")
|
||||||
|
|
||||||
|
search_engine_url = self.valves.SEARXNG_ENGINE_API_BASE_URL
|
||||||
|
|
||||||
|
# Ensure RETURNED_SCRAPPED_PAGES_NO does not exceed SCRAPPED_PAGES_NO
|
||||||
|
if self.valves.RETURNED_SCRAPPED_PAGES_NO > self.valves.SCRAPPED_PAGES_NO:
|
||||||
|
self.valves.RETURNED_SCRAPPED_PAGES_NO = self.valves.SCRAPPED_PAGES_NO
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"q": query,
|
||||||
|
"format": "json",
|
||||||
|
"number_of_results": self.valves.RETURNED_SCRAPPED_PAGES_NO,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
await emitter.emit(description="Sending request to search engine")
|
||||||
|
resp = requests.get(
|
||||||
|
search_engine_url, params=params, headers=self.headers, timeout=120
|
||||||
|
)
|
||||||
|
logger.info("query : %s", query)
|
||||||
|
logger.info("REQUEST URL: %s", resp.url)
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
results = data.get("results", [])
|
||||||
|
limited_results = results[: self.valves.SCRAPPED_PAGES_NO]
|
||||||
|
await emitter.emit(description=f"Retrieved {len(limited_results)} search results")
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
await emitter.emit(
|
||||||
|
status="error",
|
||||||
|
description=f"Error during search: {str(e)}",
|
||||||
|
done=True,
|
||||||
|
)
|
||||||
|
return json.dumps({"error": str(e)})
|
||||||
|
|
||||||
|
results_json = []
|
||||||
|
if limited_results:
|
||||||
|
await emitter.emit(description=f"Processing search results")
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
futures = [
|
||||||
|
executor.submit(
|
||||||
|
functions.process_search_result, result, self.valves
|
||||||
|
)
|
||||||
|
for result in limited_results
|
||||||
|
]
|
||||||
|
for future in concurrent.futures.as_completed(futures):
|
||||||
|
result_json = future.result()
|
||||||
|
if result_json:
|
||||||
|
try:
|
||||||
|
json.dumps(result_json)
|
||||||
|
results_json.append(result_json)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if len(results_json) >= self.valves.RETURNED_SCRAPPED_PAGES_NO:
|
||||||
|
break
|
||||||
|
|
||||||
|
results_json = results_json[: self.valves.RETURNED_SCRAPPED_PAGES_NO]
|
||||||
|
|
||||||
|
if self.valves.CITATION_LINKS and __event_emitter__:
|
||||||
|
for result in results_json:
|
||||||
|
await __event_emitter__(
|
||||||
|
{
|
||||||
|
"type": "citation",
|
||||||
|
"data": {
|
||||||
|
"document": [result["content"]],
|
||||||
|
"metadata": [{"source": result["url"]}],
|
||||||
|
"source": {"name": result["title"]},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
await emitter.emit(
|
||||||
|
status="complete",
|
||||||
|
description=f"Web search completed. Retrieved content from {len(results_json)} pages",
|
||||||
|
done=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(results_json, ensure_ascii=False)
|
||||||
|
|
||||||
|
async def get_website(
|
||||||
|
self, url: str
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Web scrape the website provided and get the content of it.
|
||||||
|
:param url: The URL of the website.
|
||||||
|
:param __event_emitter__: Optional event emitter for status updates.
|
||||||
|
:return: The content of the website in json format.
|
||||||
|
"""
|
||||||
|
__event_emitter__=None
|
||||||
|
functions = HelpFunctions()
|
||||||
|
emitter = EventEmitter(__event_emitter__)
|
||||||
|
if 'http' not in url:
|
||||||
|
#assume that a https:// prepend is needed.
|
||||||
|
url='https://'+url
|
||||||
|
|
||||||
|
await emitter.emit(description=f"Fetching content from URL: {url}")
|
||||||
|
|
||||||
|
results_json = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
response_site = requests.get(url, headers=self.headers, timeout=120)
|
||||||
|
response_site.raise_for_status()
|
||||||
|
html_content = response_site.text
|
||||||
|
|
||||||
|
await emitter.emit(description="Parsing website content")
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
|
||||||
|
page_title = soup.title.string if soup.title else "No title found"
|
||||||
|
page_title = unicodedata.normalize("NFKC", page_title.strip())
|
||||||
|
page_title = functions.remove_emojis(page_title)
|
||||||
|
title_site = page_title
|
||||||
|
url_site = url
|
||||||
|
content_site = functions.format_text(
|
||||||
|
soup.get_text(separator=" ", strip=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
truncated_content = functions.truncate_to_n_words(
|
||||||
|
content_site, self.valves.PAGE_CONTENT_WORDS_LIMIT
|
||||||
|
)
|
||||||
|
|
||||||
|
result_site = {
|
||||||
|
"title": title_site,
|
||||||
|
"url": url_site,
|
||||||
|
"content": truncated_content,
|
||||||
|
"excerpt": functions.generate_excerpt(content_site),
|
||||||
|
}
|
||||||
|
|
||||||
|
results_json.append(result_site)
|
||||||
|
|
||||||
|
if self.valves.CITATION_LINKS and __event_emitter__:
|
||||||
|
await __event_emitter__(
|
||||||
|
{
|
||||||
|
"type": "citation",
|
||||||
|
"data": {
|
||||||
|
"document": [truncated_content],
|
||||||
|
"metadata": [{"source": url_site}],
|
||||||
|
"source": {"name": title_site},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
await emitter.emit(
|
||||||
|
status="complete",
|
||||||
|
description="Website content retrieved and processed successfully",
|
||||||
|
done=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
results_json.append(
|
||||||
|
{
|
||||||
|
"url": url,
|
||||||
|
"content": f"Failed to retrieve the page. Error: {str(e)}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
await emitter.emit(
|
||||||
|
status="error",
|
||||||
|
description=f"Error fetching website content: {str(e)}",
|
||||||
|
done=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(results_json, ensure_ascii=False)
|
||||||
|
|
||||||
172
windowing_utils.py
Normal file
172
windowing_utils.py
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
# windowing_utils.py
|
||||||
|
from __future__ import annotations
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import List, Dict, Callable, Optional, Tuple, Awaitable
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
# ---------- Token counting (vervang door echte tokenizer indien je wilt)
|
||||||
|
def approx_token_count(text: str) -> int:
|
||||||
|
# ~4 chars ≈ 1 token (ruwe maar stabiele vuistregel)
|
||||||
|
return max(1, len(text) // 4)
|
||||||
|
|
||||||
|
def count_message_tokens(messages: List[Dict], tok_len: Callable[[str], int]) -> int:
|
||||||
|
total = 0
|
||||||
|
for m in messages:
|
||||||
|
total += tok_len(m.get("content", ""))
|
||||||
|
return total
|
||||||
|
|
||||||
|
# ---------- Thread ID + summary store
|
||||||
|
def derive_thread_id(body: Dict) -> str:
|
||||||
|
for key in ("conversation_id", "thread_id", "chat_id", "session_id", "room_id"):
|
||||||
|
if key in body and body[key]:
|
||||||
|
return str(body[key])
|
||||||
|
parts = [str(body.get("model", ""))]
|
||||||
|
msgs = body.get("messages", [])[:2]
|
||||||
|
for m in msgs:
|
||||||
|
parts.append(m.get("role", ""))
|
||||||
|
parts.append(m.get("content", "")[:256])
|
||||||
|
raw = "||".join(parts)
|
||||||
|
return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
|
||||||
|
|
||||||
|
class RunningSummaryStore:
|
||||||
|
def __init__(self):
|
||||||
|
self._mem: dict[str, str] = {}
|
||||||
|
def get(self, thread_id: str) -> str:
|
||||||
|
return self._mem.get(thread_id, "")
|
||||||
|
def update(self, thread_id: str, new_summary: str):
|
||||||
|
self._mem[thread_id] = new_summary
|
||||||
|
|
||||||
|
SUMMARY_STORE = RunningSummaryStore()
|
||||||
|
|
||||||
|
# ---------- Sliding window + running summary
|
||||||
|
@dataclass
|
||||||
|
class ConversationWindow:
|
||||||
|
max_ctx_tokens: int
|
||||||
|
response_reserve: int = 2048
|
||||||
|
tok_len: Callable[[str], int] = approx_token_count
|
||||||
|
running_summary: str = ""
|
||||||
|
summary_header: str = "Samenvatting tot nu toe"
|
||||||
|
history: List[Dict] = field(default_factory=list)
|
||||||
|
|
||||||
|
def add(self, role: str, content: str):
|
||||||
|
self.history.append({"role": role, "content": content})
|
||||||
|
|
||||||
|
def _base_messages(self, system_prompt: Optional[str]) -> List[Dict]:
|
||||||
|
msgs: List[Dict] = []
|
||||||
|
if system_prompt:
|
||||||
|
msgs.append({"role": "system", "content": system_prompt})
|
||||||
|
if self.running_summary:
|
||||||
|
msgs.append({"role": "system", "content": f"{self.summary_header}:\n{self.running_summary}"})
|
||||||
|
return msgs
|
||||||
|
|
||||||
|
async def build_within_budget(
|
||||||
|
self,
|
||||||
|
system_prompt: Optional[str],
|
||||||
|
summarizer: Optional[Callable[[str, List[Dict]], Awaitable[str]]] = None
|
||||||
|
) -> List[Dict]:
|
||||||
|
budget = self.max_ctx_tokens - max(1, self.response_reserve)
|
||||||
|
working = self.history[:]
|
||||||
|
candidate = self._base_messages(system_prompt) + working
|
||||||
|
if count_message_tokens(candidate, self.tok_len) <= budget:
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
# 1) trim oudste turns
|
||||||
|
while working and count_message_tokens(self._base_messages(system_prompt) + working, self.tok_len) > budget:
|
||||||
|
working.pop(0)
|
||||||
|
candidate = self._base_messages(system_prompt) + working
|
||||||
|
if count_message_tokens(candidate, self.tok_len) <= budget:
|
||||||
|
self.history = working
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
# 2) samenvatten indien mogelijk
|
||||||
|
if summarizer is None:
|
||||||
|
while working and count_message_tokens(self._base_messages(system_prompt) + working, self.tok_len) > budget:
|
||||||
|
working.pop(0)
|
||||||
|
self.history = working
|
||||||
|
return self._base_messages(system_prompt) + working
|
||||||
|
|
||||||
|
# samenvat in batches
|
||||||
|
working = self.history[:]
|
||||||
|
chunk_buf: List[Dict] = []
|
||||||
|
|
||||||
|
async def build_candidate(_summary: str, _working: List[Dict]) -> List[Dict]:
|
||||||
|
base = []
|
||||||
|
if system_prompt:
|
||||||
|
base.append({"role": "system", "content": system_prompt})
|
||||||
|
if _summary:
|
||||||
|
base.append({"role": "system", "content": f"{self.summary_header}:\n{_summary}"})
|
||||||
|
return base + _working
|
||||||
|
|
||||||
|
while working and count_message_tokens(await build_candidate(self.running_summary, working), self.tok_len) > budget:
|
||||||
|
chunk_buf.append(working.pop(0))
|
||||||
|
# bij ~1500 tokens in buffer (ruw) samenvatten
|
||||||
|
if count_message_tokens([{"role":"system","content":str(chunk_buf)}], self.tok_len) > 1500 or not working:
|
||||||
|
self.running_summary = await summarizer(self.running_summary, chunk_buf)
|
||||||
|
chunk_buf = []
|
||||||
|
|
||||||
|
# verwerk eventuele overgebleven buffer zodat er geen turns verdwijnen
|
||||||
|
if chunk_buf:
|
||||||
|
self.running_summary = await summarizer(self.running_summary, chunk_buf)
|
||||||
|
chunk_buf = []
|
||||||
|
|
||||||
|
self.history = working
|
||||||
|
return await build_candidate(self.running_summary, working)
|
||||||
|
|
||||||
|
# ---------- Repo chunking
|
||||||
|
from typing import Iterable
|
||||||
|
def split_text_tokens(
|
||||||
|
text: str,
|
||||||
|
tok_len: Callable[[str], int],
|
||||||
|
max_tokens: int,
|
||||||
|
overlap_tokens: int = 60
|
||||||
|
) -> List[str]:
|
||||||
|
if tok_len(text) <= max_tokens:
|
||||||
|
return [text]
|
||||||
|
approx_ratio = max_tokens / max(1, tok_len(text))
|
||||||
|
step = max(1000, int(len(text) * approx_ratio))
|
||||||
|
chunks: List[str] = []
|
||||||
|
i = 0
|
||||||
|
while i < len(text):
|
||||||
|
ch = text[i:i+step]
|
||||||
|
while tok_len(ch) > max_tokens and len(ch) > 200:
|
||||||
|
ch = ch[:-200]
|
||||||
|
chunks.append(ch)
|
||||||
|
if overlap_tokens > 0:
|
||||||
|
ov_chars = max(100, overlap_tokens * 4)
|
||||||
|
i += max(1, len(ch) - ov_chars)
|
||||||
|
else:
|
||||||
|
i += len(ch)
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def fit_context_under_budget(
|
||||||
|
items: List[Tuple[str,str]], tok_len: Callable[[str], int], budget_tokens: int
|
||||||
|
) -> List[Tuple[str,str]]:
|
||||||
|
res: List[Tuple[str,str]] = []
|
||||||
|
used = 0
|
||||||
|
for title, text in items:
|
||||||
|
t = tok_len(text)
|
||||||
|
if used + t <= budget_tokens:
|
||||||
|
res.append((title, text))
|
||||||
|
used += t
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return res
|
||||||
|
|
||||||
|
def build_repo_context(
|
||||||
|
files_ranked: List[Tuple[str, str, float]],
|
||||||
|
per_chunk_tokens: int = 2100,
|
||||||
|
overlap_tokens: int = 60,
|
||||||
|
ctx_budget_tokens: int = 5000,
|
||||||
|
tok_len: Callable[[str], int] = approx_token_count
|
||||||
|
) -> str:
|
||||||
|
expanded: List[Tuple[str,str]] = []
|
||||||
|
for path, content, _ in files_ranked:
|
||||||
|
for i, ch in enumerate(split_text_tokens(content, tok_len, per_chunk_tokens, overlap_tokens)):
|
||||||
|
expanded.append((f"{path}#chunk{i+1}", ch))
|
||||||
|
selected = fit_context_under_budget(expanded, tok_len, ctx_budget_tokens)
|
||||||
|
ctx = ""
|
||||||
|
for title, ch in selected:
|
||||||
|
ctx += f"\n\n=== {title} ===\n{ch}"
|
||||||
|
return ctx.strip()
|
||||||
Loading…
Reference in New Issue
Block a user