repo=- fix

This commit is contained in:
admin 2025-11-27 08:54:07 +01:00
parent a79293abbd
commit 363701e0ee
2 changed files with 78 additions and 28 deletions

View File

@ -78,7 +78,7 @@ COPY queue_helper.py .
COPY agent_repo.py .
COPY windowing_utils.py .
COPY smart_rag.py .
COPY llm_client .
COPY llm_client.py .
EXPOSE 8080

View File

@ -75,6 +75,14 @@ _TRANS_WRAPPERS = [
r"trans\(\s*{q}(.+?){q}\s*\)".format(q=_Q),
]
def _clean_repo_arg(x):
"""Zet lege/sentinel repo-waarden om naar None (geen filter)."""
if x is None:
return None
s = str(x).strip().lower()
return None if s in ("", "-", "none") else x
def _extract_repo_branch_from_text(txt: str) -> Tuple[Optional[str], str]:
repo_url, branch = None, "main"
m = re.search(r"\bRepo\s*:\s*(\S+)", txt, flags=re.I)
@ -1019,7 +1027,13 @@ def meili_catalog_search(q: str, limit: int = 10) -> List[dict]:
idx = meili_get_index(REPO_CATALOG_MEILI_INDEX)
if not idx: return []
try:
res = idx.search(q, {"limit": limit})
#res = idx.search(q, {"limit": limit})
# Gebruik ALTIJD de injectie:
res = await meili_search_fn(
q,
limit=limit,
filter={"repo_full": st.owner_repo, "branch": st.branch_base}
)
return res.get("hits", [])
except Exception as e:
logger.warning("WARN:agent_repo:meili_catalog_search: %s", e)
@ -1548,8 +1562,14 @@ def meili_search(owner_repo: Optional[str], branch: str, q: str, limit: int = 10
cli = get_meili()
if not cli: return []
try:
idx = cli.index(meili_index_name(owner_repo, branch))
res = idx.search(q, {"limit": limit})
#idx = cli.index(meili_index_name(owner_repo, branch))
#res = idx.search(q, {"limit": limit})
# Gebruik ALTIJD de injectie:
res = await meili_search_fn(
q,
limit=limit,
filter={"repo_full": st.owner_repo, "branch": st.branch_base}
)
return res.get("hits", [])
except Exception as e:
logger.warning("WARN:agent_repo:meili_search failed: %s", e)
@ -2201,12 +2221,12 @@ async def llm_plan_edits_for_file(user_goal: str, rel: str, focus_snippet: str)
# Tree-hint standaard aan: korte mapoverzicht + samenvattingen van nabije files
tree_block = globals().get("_LLM_EDIT_TREE_HINT", "")
tree_hint = os.getenv("AGENT_TREE_PROMPT","1").lower() not in ("0","false")
tree_block = ""
try:
if tree_hint:
# NB: eenvoudige, lokale context: alleen siblings + map info om tokens te sparen
# (Vereist repo_root hier normaal gesproken; als niet beschikbaar, laat leeg)
tree_block = "\n(Tree-overzicht niet beschikbaar in deze context)\n"
if not tree_block:
tree_block = "\n(Tree-overzicht niet beschikbaar in deze context)\n"
except Exception:
pass
USER = (
@ -3749,6 +3769,35 @@ def _extract_explicit_paths_robust(text: str) -> list[str]:
uniq.append(p); seen.add(p)
return uniq
def _sanitize_path_hints(hints: list[str], all_files: list[str]) -> list[str]:
"""
Filter pseudo-paden zoals 'tool.list' weg. Sta alleen echte projectpaden of
bekende extensies toe en vereis een '/' om pure tokens te weren.
"""
if not hints:
return []
ALLOWED_SUFFIXES = (
".blade.php",".php",".js",".ts",".json",".yml",".yaml",".py",".md",".env",
".sql",".css",".vue",".jsx",".tsx"
)
BAD_BASENAMES = {"tool","tools","list","search","update","create","store","index"}
out, seen = [], set()
for h in hints:
if not h:
continue
h = h.strip().lstrip("./").replace("\\","/")
if "/" not in h:
continue
base = os.path.basename(h)
stem = base.split(".",1)[0].lower()
if h not in all_files and not any(h.endswith(suf) for suf in ALLOWED_SUFFIXES):
continue
if stem in BAD_BASENAMES and h not in all_files:
continue
if h not in seen:
seen.add(h); out.append(h)
return out
def _grep_repo_for_literal(root: Path, needle: str, limit: int = 12) -> list[str]:
"""
Heel snelle, ruwe literal-zoeker over tekstbestanden in de repo.
@ -4381,9 +4430,9 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
picked: List[str] = []
# 1) expliciete paden uit de prompt (bestaande extractor)
explicit = list(extract_explicit_paths(st.user_goal) or [])
explicit = _sanitize_path_hints(list(extract_explicit_paths(st.user_goal) or []), all_files)
# 2) robuuste fallback extractor
robust = _extract_explicit_paths_robust(st.user_goal)
robust = _sanitize_path_hints(_extract_explicit_paths_robust(st.user_goal), all_files)
for pth in explicit + [p for p in robust if p not in explicit]:
norm = pth.replace("\\", "/").strip()
if norm in all_files and norm not in picked:
@ -4461,26 +4510,26 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
except Exception:
return False
view_files = [f for f in all_files
if f.startswith("resources/views/") and f.endswith(".blade.php")]
lang_files = [f for f in all_files
if f.startswith("resources/lang/") and (f.endswith(".json") or f.endswith(".php"))]
view_files = [f for f in all_files
if f.startswith("resources/views/") and f.endswith(".blade.php")]
lang_files = [f for f in all_files
if f.startswith("resources/lang/") and (f.endswith(".json") or f.endswith(".php"))]
# Als we de oude literal kennen: eerst de files waar die echt in staat
if old_lit:
view_hits = [f for f in view_files if _contains_old(f)]
lang_hits = [f for f in lang_files if _contains_old(f)]
else:
view_hits = view_files
lang_hits = lang_files
# Als we de oude literal kennen: eerst de files waar die echt in staat
if old_lit:
view_hits = [f for f in view_files if _contains_old(f)]
lang_hits = [f for f in lang_files if _contains_old(f)]
else:
view_hits = view_files
lang_hits = lang_files
# Zet de meest waarschijnlijke kandidaten vóóraan, behoud verder huidige volgorde
front = []
for lst in (view_hits, lang_hits):
for f in lst:
if f in all_files and f not in front:
front.append(f)
picked = list(dict.fromkeys(front + picked))[:MAX_FILES_DRYRUN]
# Zet de meest waarschijnlijke kandidaten vóóraan, behoud verder huidige volgorde
front = []
for lst in (view_hits, lang_hits):
for f in lst:
if f in all_files and f not in front:
front.append(f)
picked = list(dict.fromkeys(front + picked))[:MAX_FILES_DRYRUN]
# --- (optioneel) priors op basis van framework (je eerdere patch A/B) ---
@ -4500,16 +4549,17 @@ async def handle_repo_agent(messages: List[dict], request) -> str:
logger.info("Smart RAG path select. 2) retrieval")
merged = []
for qv in variants:
use_collection = bool(st.collection_name)
part = await hybrid_retrieve(
_rag_query_internal,
qv,
repo=st.owner_repo, # <<< repo-scope
repo=_clean_repo_arg(st.owner_repo) if not use_collection else None,
profile=None,
path_contains=(file_hints[0] if file_hints else None),
per_query_k=int(os.getenv("RAG_PER_QUERY_K","30")),
n_results=int(os.getenv("RAG_N_RESULTS","18")),
alpha=float(os.getenv("RAG_EMB_WEIGHT","0.6")),
collection_name=st.collection_name # <<< repo-collection
collection_name=(st.collection_name if use_collection else None)
)
merged.extend(part)