[fix] vulkan
This commit is contained in:
parent
b273987872
commit
d119743bd6
1 changed files with 19 additions and 5 deletions
|
|
@ -3,7 +3,7 @@ services:
|
||||||
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
image: ghcr.io/ggml-org/llama.cpp:server-vulkan
|
||||||
container_name: llama-server-vulkan
|
container_name: llama-server-vulkan
|
||||||
ports:
|
ports:
|
||||||
- "${HOST_PORT:-8080}:8080"
|
- 8878:8080
|
||||||
volumes:
|
volumes:
|
||||||
- /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
|
- /home/andy/.lmstudio/models/lmstudio-community:/root/.cache/llama.cpp/
|
||||||
# Optionally mount a local .gguf file directly:
|
# Optionally mount a local .gguf file directly:
|
||||||
|
|
@ -32,7 +32,21 @@ services:
|
||||||
LLAMA_ARG_THREADS: "${THREADS:-8}"
|
LLAMA_ARG_THREADS: "${THREADS:-8}"
|
||||||
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command: [
|
command: >
|
||||||
"--jinja", # Enable Jinja templates for chat formatting
|
--n-gpu-layers all
|
||||||
"--flash-attn" # Enable flash attention if supported
|
--parallel 2
|
||||||
]
|
--flash-attn on
|
||||||
|
--ctx-size 10000
|
||||||
|
--no-mmap
|
||||||
|
--cache-type-k q8_0
|
||||||
|
--cache-type-v q8_0
|
||||||
|
--jinja
|
||||||
|
--reasoning-budget 0
|
||||||
|
--temp 0.8
|
||||||
|
--top-p 0.95
|
||||||
|
--top-k 20
|
||||||
|
--min-p 0.0
|
||||||
|
--presence-penalty 1.0
|
||||||
|
--repeat-penalty 1.0
|
||||||
|
--port 8080
|
||||||
|
--host 0.0.0.0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue