From 363701e0ee5ea580028e80b22e729878f29b6c27 Mon Sep 17 00:00:00 2001 From: admin Date: Thu, 27 Nov 2025 08:54:07 +0100 Subject: [PATCH] repo=- fix --- Dockerfile | 2 +- agent_repo.py | 104 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/Dockerfile b/Dockerfile index bb7b046..3aae3d6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -78,7 +78,7 @@ COPY queue_helper.py . COPY agent_repo.py . COPY windowing_utils.py . COPY smart_rag.py . -COPY llm_client . +COPY llm_client.py . EXPOSE 8080 diff --git a/agent_repo.py b/agent_repo.py index 14ea4ce..2c91b18 100644 --- a/agent_repo.py +++ b/agent_repo.py @@ -75,6 +75,14 @@ _TRANS_WRAPPERS = [ r"trans\(\s*{q}(.+?){q}\s*\)".format(q=_Q), ] +def _clean_repo_arg(x): + """Zet lege/sentinel repo-waarden om naar None (geen filter).""" + if x is None: + return None + s = str(x).strip().lower() + return None if s in ("", "-", "none") else x + + def _extract_repo_branch_from_text(txt: str) -> Tuple[Optional[str], str]: repo_url, branch = None, "main" m = re.search(r"\bRepo\s*:\s*(\S+)", txt, flags=re.I) @@ -1019,7 +1027,13 @@ def meili_catalog_search(q: str, limit: int = 10) -> List[dict]: idx = meili_get_index(REPO_CATALOG_MEILI_INDEX) if not idx: return [] try: - res = idx.search(q, {"limit": limit}) + #res = idx.search(q, {"limit": limit}) + # Gebruik ALTIJD de injectie: + res = await meili_search_fn( + q, + limit=limit, + filter={"repo_full": st.owner_repo, "branch": st.branch_base} + ) return res.get("hits", []) except Exception as e: logger.warning("WARN:agent_repo:meili_catalog_search: %s", e) @@ -1548,8 +1562,14 @@ def meili_search(owner_repo: Optional[str], branch: str, q: str, limit: int = 10 cli = get_meili() if not cli: return [] try: - idx = cli.index(meili_index_name(owner_repo, branch)) - res = idx.search(q, {"limit": limit}) + #idx = cli.index(meili_index_name(owner_repo, branch)) + #res = idx.search(q, {"limit": limit}) + # Gebruik ALTIJD de injectie: + res = await meili_search_fn( + q, + limit=limit, + filter={"repo_full": st.owner_repo, "branch": st.branch_base} + ) return res.get("hits", []) except Exception as e: logger.warning("WARN:agent_repo:meili_search failed: %s", e) @@ -2201,12 +2221,12 @@ async def llm_plan_edits_for_file(user_goal: str, rel: str, focus_snippet: str) # Tree-hint standaard aan: korte mapoverzicht + samenvattingen van nabije files tree_block = globals().get("_LLM_EDIT_TREE_HINT", "") tree_hint = os.getenv("AGENT_TREE_PROMPT","1").lower() not in ("0","false") - tree_block = "" try: if tree_hint: # NB: eenvoudige, lokale context: alleen siblings + map info om tokens te sparen # (Vereist repo_root hier normaal gesproken; als niet beschikbaar, laat leeg) - tree_block = "\n(Tree-overzicht niet beschikbaar in deze context)\n" + if not tree_block: + tree_block = "\n(Tree-overzicht niet beschikbaar in deze context)\n" except Exception: pass USER = ( @@ -3749,6 +3769,35 @@ def _extract_explicit_paths_robust(text: str) -> list[str]: uniq.append(p); seen.add(p) return uniq +def _sanitize_path_hints(hints: list[str], all_files: list[str]) -> list[str]: + """ + Filter pseudo-paden zoals 'tool.list' weg. Sta alleen echte projectpaden of + bekende extensies toe en vereis een '/' om pure tokens te weren. + """ + if not hints: + return [] + ALLOWED_SUFFIXES = ( + ".blade.php",".php",".js",".ts",".json",".yml",".yaml",".py",".md",".env", + ".sql",".css",".vue",".jsx",".tsx" + ) + BAD_BASENAMES = {"tool","tools","list","search","update","create","store","index"} + out, seen = [], set() + for h in hints: + if not h: + continue + h = h.strip().lstrip("./").replace("\\","/") + if "/" not in h: + continue + base = os.path.basename(h) + stem = base.split(".",1)[0].lower() + if h not in all_files and not any(h.endswith(suf) for suf in ALLOWED_SUFFIXES): + continue + if stem in BAD_BASENAMES and h not in all_files: + continue + if h not in seen: + seen.add(h); out.append(h) + return out + def _grep_repo_for_literal(root: Path, needle: str, limit: int = 12) -> list[str]: """ Heel snelle, ruwe literal-zoeker over tekstbestanden in de repo. @@ -4381,9 +4430,9 @@ async def handle_repo_agent(messages: List[dict], request) -> str: picked: List[str] = [] # 1) expliciete paden uit de prompt (bestaande extractor) - explicit = list(extract_explicit_paths(st.user_goal) or []) + explicit = _sanitize_path_hints(list(extract_explicit_paths(st.user_goal) or []), all_files) # 2) robuuste fallback extractor - robust = _extract_explicit_paths_robust(st.user_goal) + robust = _sanitize_path_hints(_extract_explicit_paths_robust(st.user_goal), all_files) for pth in explicit + [p for p in robust if p not in explicit]: norm = pth.replace("\\", "/").strip() if norm in all_files and norm not in picked: @@ -4461,26 +4510,26 @@ async def handle_repo_agent(messages: List[dict], request) -> str: except Exception: return False - view_files = [f for f in all_files - if f.startswith("resources/views/") and f.endswith(".blade.php")] - lang_files = [f for f in all_files - if f.startswith("resources/lang/") and (f.endswith(".json") or f.endswith(".php"))] + view_files = [f for f in all_files + if f.startswith("resources/views/") and f.endswith(".blade.php")] + lang_files = [f for f in all_files + if f.startswith("resources/lang/") and (f.endswith(".json") or f.endswith(".php"))] - # Als we de oude literal kennen: eerst de files waar die echt in staat - if old_lit: - view_hits = [f for f in view_files if _contains_old(f)] - lang_hits = [f for f in lang_files if _contains_old(f)] - else: - view_hits = view_files - lang_hits = lang_files + # Als we de oude literal kennen: eerst de files waar die echt in staat + if old_lit: + view_hits = [f for f in view_files if _contains_old(f)] + lang_hits = [f for f in lang_files if _contains_old(f)] + else: + view_hits = view_files + lang_hits = lang_files - # Zet de meest waarschijnlijke kandidaten vóóraan, behoud verder huidige volgorde - front = [] - for lst in (view_hits, lang_hits): - for f in lst: - if f in all_files and f not in front: - front.append(f) - picked = list(dict.fromkeys(front + picked))[:MAX_FILES_DRYRUN] + # Zet de meest waarschijnlijke kandidaten vóóraan, behoud verder huidige volgorde + front = [] + for lst in (view_hits, lang_hits): + for f in lst: + if f in all_files and f not in front: + front.append(f) + picked = list(dict.fromkeys(front + picked))[:MAX_FILES_DRYRUN] # --- (optioneel) priors op basis van framework (je eerdere patch A/B) --- @@ -4500,16 +4549,17 @@ async def handle_repo_agent(messages: List[dict], request) -> str: logger.info("Smart RAG path select. 2) retrieval") merged = [] for qv in variants: + use_collection = bool(st.collection_name) part = await hybrid_retrieve( _rag_query_internal, qv, - repo=st.owner_repo, # <<< repo-scope + repo=_clean_repo_arg(st.owner_repo) if not use_collection else None, profile=None, path_contains=(file_hints[0] if file_hints else None), per_query_k=int(os.getenv("RAG_PER_QUERY_K","30")), n_results=int(os.getenv("RAG_N_RESULTS","18")), alpha=float(os.getenv("RAG_EMB_WEIGHT","0.6")), - collection_name=st.collection_name # <<< repo-collection + collection_name=(st.collection_name if use_collection else None) ) merged.extend(part)