integrated sentence transformer model
This commit is contained in:
parent
c7c7162311
commit
ddc1c56cd7
10
Dockerfile
10
Dockerfile
@ -53,7 +53,15 @@ mapping = {
|
|||||||
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
choice = os.environ.get("RAG_EMBEDDINGS","gte-multilingual").lower()
|
||||||
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
hf_id = mapping.get(choice, "BAAI/bge-small-en-v1.5")
|
||||||
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
|
# cache_folder respecteert SENTENCE_TRANSFORMERS_HOME/HF_HOME, maar we forceren expliciet:
|
||||||
SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
|
cache_root = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
|
||||||
|
local_dir = os.path.join(cache_root, "embedder")
|
||||||
|
os.makedirs(cache_root, exist_ok = True)
|
||||||
|
|
||||||
|
print("Downloading SentenceTransformer:", hf_id)
|
||||||
|
# Laat SentenceTransformer zelf hun cache doen (HF_HOME etc)
|
||||||
|
# wij saven het eindresultaat daarna naar local_dir
|
||||||
|
model = SentenceTransformer(hf_id, cache_folder=os.environ.get("SENTENCE_TRANSFORMERS_HOME","/opt/sentence-transformers"))
|
||||||
|
model.save(local_dir)
|
||||||
print("Prefetched SentenceTransformer:", hf_id)
|
print("Prefetched SentenceTransformer:", hf_id)
|
||||||
PY
|
PY
|
||||||
|
|
||||||
|
|||||||
35
app.py
35
app.py
@ -190,10 +190,21 @@ def _extract_tool_calls_from_text(txt: str):
|
|||||||
raw = obj["call_tool"]
|
raw = obj["call_tool"]
|
||||||
if isinstance(raw, dict):
|
if isinstance(raw, dict):
|
||||||
raw = [raw]
|
raw = [raw]
|
||||||
|
if isinstance(raw, str):
|
||||||
|
print("Invalid toolcall:",str(raw))
|
||||||
|
raw=[]
|
||||||
tc = []
|
tc = []
|
||||||
for it in (raw or []):
|
for it in (raw or []):
|
||||||
name = (it or {}).get("name")
|
try:
|
||||||
args = (it or {}).get("arguments") or {}
|
name = (it or {}).get("name")
|
||||||
|
except Exception as e:
|
||||||
|
print("Error:",str(e),"raw=",str(raw),"it=",str(it))
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
args = (it or {}).get("arguments") or {}
|
||||||
|
except Exception as e:
|
||||||
|
print("Error:",str(e),"raw=",str(raw),"it=",str(it))
|
||||||
|
continue
|
||||||
if isinstance(args, str):
|
if isinstance(args, str):
|
||||||
try: args = json.loads(args)
|
try: args = json.loads(args)
|
||||||
except Exception: pass
|
except Exception: pass
|
||||||
@ -540,11 +551,23 @@ def _build_embedder() -> _Embedder:
|
|||||||
model_name, family, slug = mapping["bge-small"]
|
model_name, family, slug = mapping["bge-small"]
|
||||||
else:
|
else:
|
||||||
model_name, family, slug = mapping[choice]
|
model_name, family, slug = mapping[choice]
|
||||||
|
cache_dir = os.environ.get("SENTENCE_TRANSFORMERS_HOME", "/opt/sentence-transformers")
|
||||||
|
local_dir = os.path.join(cache_dir, "embedder")
|
||||||
st_kwargs = {"device": "cpu"}
|
st_kwargs = {"device": "cpu"}
|
||||||
|
if os.path.isdir(local_dir):
|
||||||
|
# Prefetched model in image → gebruik dat
|
||||||
|
model_source = local_dir
|
||||||
|
print(f"Loading SentenceTransformer from local dir: {local_dir}")
|
||||||
|
else:
|
||||||
|
# Geen lokale dir gevonden → terugvallen op HF-ID + cache_folder
|
||||||
|
model_source = model_name
|
||||||
|
st_kwargs["cache_folder"] = cache_dir
|
||||||
|
print(f"Local dir {local_dir} not found, falling back to HF model: {model_name}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if "trust_remote_code" in inspect.signature(SentenceTransformer).parameters:
|
if "trust_remote_code" in inspect.signature(SentenceTransformer).parameters:
|
||||||
st_kwargs["trust_remote_code"] = True
|
st_kwargs["trust_remote_code"] = True
|
||||||
model = SentenceTransformer(model_name, **st_kwargs)
|
model = SentenceTransformer(model_source, **st_kwargs)
|
||||||
# optioneel: CPU thread-telling forceren
|
# optioneel: CPU thread-telling forceren
|
||||||
try:
|
try:
|
||||||
thr = int(os.getenv("RAG_TORCH_THREADS", "0"))
|
thr = int(os.getenv("RAG_TORCH_THREADS", "0"))
|
||||||
@ -554,8 +577,8 @@ def _build_embedder() -> _Embedder:
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
return _Embedder(slug=slug, family=family, model=model, device="cpu")
|
return _Embedder(slug=slug, family=family, model=model, device="cpu")
|
||||||
except Exception:
|
except Exception as e:
|
||||||
pass
|
print("ERROR building embedder:",str(e))
|
||||||
|
|
||||||
# Fallback via Chroma embedding functions
|
# Fallback via Chroma embedding functions
|
||||||
from chromadb.utils import embedding_functions as ef
|
from chromadb.utils import embedding_functions as ef
|
||||||
@ -2630,6 +2653,8 @@ async def openai_chat_completions(body: dict = Body(...), request: Request = Non
|
|||||||
})
|
})
|
||||||
|
|
||||||
call = (obj.get("call_tool") or {})
|
call = (obj.get("call_tool") or {})
|
||||||
|
if isinstance(call, str):
|
||||||
|
call={"name": "Invalid_toolcall", "arguments": {}}
|
||||||
tname = call.get("name")
|
tname = call.get("name")
|
||||||
if tname not in TOOLS_REGISTRY:
|
if tname not in TOOLS_REGISTRY:
|
||||||
return JSONResponse(_openai_chat_response(model, f"Onbekende tool: {tname}", messages))
|
return JSONResponse(_openai_chat_response(model, f"Onbekende tool: {tname}", messages))
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user