[fix] vulkan

This commit is contained in:
Andy Bunce 2026-04-23 22:55:20 +01:00
parent b273987872
commit d119743bd6

View file

@ -3,7 +3,7 @@ services:
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
container_name: llama-server-vulkan
ports:
- "${HOST_PORT:-8080}:8080"
- 8878:8080
volumes:
- /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
# Optionally mount a local .gguf file directly:
@ -32,7 +32,21 @@ services:
LLAMA_ARG_THREADS: "${THREADS:-8}"
restart: unless-stopped
command: [
"--jinja", # Enable Jinja templates for chat formatting
"--flash-attn" # Enable flash attention if supported
]
command: >
--n-gpu-layers all
--parallel 2
--flash-attn on
--ctx-size 10000
--no-mmap
--cache-type-k q8_0
--cache-type-v q8_0
--jinja
--reasoning-budget 0
--temp 0.8
--top-p 0.95
--top-k 20
--min-p 0.0
--presence-penalty 1.0
--repeat-penalty 1.0
--port 8080
--host 0.0.0.0