[fix] vulkan
This commit is contained in:
parent
b273987872
commit
d119743bd6
1 changed files with 19 additions and 5 deletions
|
|
@ -3,7 +3,7 @@ services:
|
|||
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
||||
container_name: llama-server-vulkan
|
||||
ports:
|
||||
- "${HOST_PORT:-8080}:8080"
|
||||
- 8878:8080
|
||||
volumes:
|
||||
- /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
|
||||
# Optionally mount a local .gguf file directly:
|
||||
|
|
@ -32,7 +32,21 @@ services:
|
|||
LLAMA_ARG_THREADS: "${THREADS:-8}"
|
||||
|
||||
restart: unless-stopped
|
||||
command: [
|
||||
"--jinja", # Enable Jinja templates for chat formatting
|
||||
"--flash-attn" # Enable flash attention if supported
|
||||
]
|
||||
command: >
|
||||
--n-gpu-layers all
|
||||
--parallel 2
|
||||
--flash-attn on
|
||||
--ctx-size 10000
|
||||
--no-mmap
|
||||
--cache-type-k q8_0
|
||||
--cache-type-v q8_0
|
||||
--jinja
|
||||
--reasoning-budget 0
|
||||
--temp 0.8
|
||||
--top-p 0.95
|
||||
--top-k 20
|
||||
--min-p 0.0
|
||||
--presence-penalty 1.0
|
||||
--repeat-penalty 1.0
|
||||
--port 8080
|
||||
--host 0.0.0.0
|
||||
Loading…
Add table
Add a link
Reference in a new issue