From d119743bd6f31c9ab2eddd62a95b6ab22b2c82db Mon Sep 17 00:00:00 2001
From: Andy Bunce <bunce.andy@gmail.com>
Date: Thu, 23 Apr 2026 22:55:20 +0100
Subject: [PATCH] [fix]  vulkan

---
 llama.cpp/compose.yml | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/llama.cpp/compose.yml b/llama.cpp/compose.yml
index db0ba86..1cf3342 100644
--- a/llama.cpp/compose.yml
+++ b/llama.cpp/compose.yml
@@ -3,7 +3,7 @@ services:
     image: ghcr.io/ggml-org/llama.cpp:server-vulkan
     container_name: llama-server-vulkan
     ports:
-      - "${HOST_PORT:-8080}:8080"
+      - 8878:8080
     volumes:
       - /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
       # Optionally mount a local .gguf file directly:
@@ -32,7 +32,21 @@ services:
       LLAMA_ARG_THREADS: "${THREADS:-8}"
       
     restart: unless-stopped
-    command: [
-      "--jinja",  # Enable Jinja templates for chat formatting
-      "--flash-attn"  # Enable flash attention if supported
-    ]
\ No newline at end of file
+    command: >
+      --n-gpu-layers all
+      --parallel 2
+      --flash-attn on
+      --ctx-size 10000
+      --no-mmap
+      --cache-type-k q8_0
+      --cache-type-v q8_0
+      --jinja
+      --reasoning-budget 0
+      --temp 0.8
+      --top-p 0.95
+      --top-k 20
+      --min-p 0.0
+      --presence-penalty 1.0
+      --repeat-penalty 1.0
+      --port 8080
+      --host 0.0.0.0
\ No newline at end of file