[fix] vulkan

2026-04-23 22:55:20 +01:00 · 2026-04-23 22:55:20 +01:00 · d119743bd6
commit d119743bd6
parent b273987872
1 changed files with 19 additions and 5 deletions
--- a/llama.cpp/compose.yml
+++ b/llama.cpp/compose.yml
@ -3,7 +3,7 @@ services:
    image: ghcr.io/ggml-org/llama.cpp:server-vulkan
    container_name: llama-server-vulkan
    ports:
-      - "${HOST_PORT:-8080}:8080"
+      - 8878:8080
    volumes:
      - /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
      # Optionally mount a local .gguf file directly:
@ -32,7 +32,21 @@ services:
      LLAMA_ARG_THREADS: "${THREADS:-8}"
      
    restart: unless-stopped
-    command: [
-      "--jinja",  # Enable Jinja templates for chat formatting
-      "--flash-attn"  # Enable flash attention if supported
-    ]
+    command: >
+      --n-gpu-layers all
+      --parallel 2
+      --flash-attn on
+      --ctx-size 10000
+      --no-mmap
+      --cache-type-k q8_0
+      --cache-type-v q8_0
+      --jinja
+      --reasoning-budget 0
+      --temp 0.8
+      --top-p 0.95
+      --top-k 20
+      --min-p 0.0
+      --presence-penalty 1.0
+      --repeat-penalty 1.0
+      --port 8080
+      --host 0.0.0.0