8 lines
2.0 KiB
Bash
8 lines
2.0 KiB
Bash
|
|
docker run --name "mistral-llm0" --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -d --restart unless-stopped -v /opt/models/mistral/:/models -p 8000:8080 llama-server-noavx:latest -m /models/Ministral-3-14B-Instruct-2512-Q4_K_M.gguf --host 0.0.0.0 --port 8080 --n-gpu-layers -1 --flash-attn on --split-mode layer -c 42000 --jinja --chat-template-file /models/chat_templateX.jinja
|
||
|
|
#docker run --name "mistral-llm0" --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -d --restart unless-stopped -v /opt/models/mistral/:/models -p 8000:8080 llama-server-noavx:latest -m /models/Magistral-Small-2509-Q4_K_M.gguf --host 0.0.0.0 --port 8080 --n-gpu-layers -1 --flash-attn on --split-mode layer -c 10000 --jinja
|
||
|
|
#cur docker run --name "mistral-llm0" --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 -d --restart unless-stopped -v /opt/models/mistral/:/models -p 8000:8080 llama-server-noavx:latest -m /models/Ministral-3-14B-Reasoning-2512-Q4_K_M.gguf --host 0.0.0.0 --port 8080 --n-gpu-layers -1 --mmproj /models/Ministral/mmproj-F16-Ministral-Reasoning-2512.gguf --flash-attn on --split-mode layer --temp 0.05 -c 42000 --frequency_penalty 0.8 --repeat_penalty 1.2 --jinja
|
||
|
|
#docker run --name "mistral-llm0" --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 --rm -v /opt/models/mistral/:/models -p 8000:8080 llama-server-noavx:latest -m /models/Ministral-3-14B-Instruct-2512-Q4_K_M.gguf --host 0.0.0.0 --port 8080 --n-gpu-layers -1 --mmproj /models/Ministral/mmproj-F16-Ministral-Instruct-2512.gguf --flash-attn on --split-mode layer -c 42000 --jinja
|
||
|
|
#docker run --name "mistral-llm0" --gpus device=0 -e CUDA_VISIBLE_DEVICES=0 --rm -v /opt/models/mistral/:/models -p 8000:8080 llama-server-noavx:latest -m /models/Magistral-Small-2509-Q4_K_M.gguf --host 0.0.0.0 --port 8080 --n-gpu-layers -1 --mmproj /models/mmproj-F16.gguf --cache-type-k q8_0 --cache-type-v q8_0 --flash-attn on --split-mode layer -c 13288 --jinja
|
||
|
|
#/opt/models/mistral/Magistral-Small-2509-Q4_K_M.gguf
|
||
|
|
#/opt/models/mistral/devstral-small-2-chat-template-opencode.jinja
|