From d119743bd6f31c9ab2eddd62a95b6ab22b2c82db Mon Sep 17 00:00:00 2001 From: Andy Bunce Date: Thu, 23 Apr 2026 22:55:20 +0100 Subject: [PATCH] [fix] vulkan --- llama.cpp/compose.yml | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/llama.cpp/compose.yml b/llama.cpp/compose.yml index db0ba86..1cf3342 100644 --- a/llama.cpp/compose.yml +++ b/llama.cpp/compose.yml @@ -3,7 +3,7 @@ services: image: ghcr.io/ggml-org/llama.cpp:server-vulkan container_name: llama-server-vulkan ports: - - "${HOST_PORT:-8080}:8080" + - 8878:8080 volumes: - /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/ # Optionally mount a local .gguf file directly: @@ -32,7 +32,21 @@ services: LLAMA_ARG_THREADS: "${THREADS:-8}" restart: unless-stopped - command: [ - "--jinja", # Enable Jinja templates for chat formatting - "--flash-attn" # Enable flash attention if supported - ] \ No newline at end of file + command: > + --n-gpu-layers all + --parallel 2 + --flash-attn on + --ctx-size 10000 + --no-mmap + --cache-type-k q8_0 + --cache-type-v q8_0 + --jinja + --reasoning-budget 0 + --temp 0.8 + --top-p 0.95 + --top-k 20 + --min-p 0.0 + --presence-penalty 1.0 + --repeat-penalty 1.0 + --port 8080 + --host 0.0.0.0 \ No newline at end of file