This commit is contained in:
Andy Bunce 2026-02-24 21:14:02 +00:00
commit 37adbcc96f
4 changed files with 219 additions and 0 deletions

View file

@ -0,0 +1,15 @@
[server]
# The specific GGUF file to run inside this folder
model_file = "Qwen3-Coder-Next-BF16-00001-of-00004.gguf"
# Hardware Settings
n_gpu_layers = 99
ctx_size = 131072
threads = 24
# Network Settings
port = 8082
host = "0.0.0.0"
# Optional: Set a friendly alias for tools like OpenCode
alias = "qwen3-coder"

91
models/launch.py Executable file
View file

@ -0,0 +1,91 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import tomllib # Requires Python 3.11+ (Standard on macOS Sonoma/Sequoia)
import subprocess
# --- CONFIGURATION ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
GGUF_DIR = os.path.join(BASE_DIR, "gguf")
def load_config(model_name):
# 1. Locate the folder
model_path = os.path.join(GGUF_DIR, model_name)
config_file = os.path.join(model_path, "config.toml")
if not os.path.exists(config_file):
print(f"X Error: Config file not found at {config_file}")
sys.exit(1)
# 2. Parse TOML
with open(config_file, "rb") as f:
config = tomllib.load(f)
return config, model_path
def build_command(config, model_path):
server_conf = config.get("server", {})
# 1. Find the .gguf file
gguf_name = server_conf.get("model_file")
if not gguf_name:
print("X Error: 'model_file' missing in config.toml")
sys.exit(1)
full_model_path = os.path.join(model_path, gguf_name)
# 2. Build the llama-server command
cmd = ["llama-server"]
# Map TOML keys to CLI flags
# Key = TOML key, Value = CLI flag
args_map = {
"model_file": "-m", # We handle the path manually, but good for ref
"n_gpu_layers": "--n-gpu-layers",
"ctx_size": "--ctx-size",
"threads": "--threads",
"port": "--port",
"host": "--host",
"alias": "--alias",
}
cmd.extend(["-m", full_model_path])
for key, value in server_conf.items():
if key == "model_file":
continue # Already added
flag = args_map.get(key)
if flag:
cmd.extend([flag, str(value)])
return cmd
def main():
parser = argparse.ArgumentParser(description="Launch a local LLM from config.")
parser.add_argument("model", help="The name of the folder inside gguf/")
args = parser.parse_args()
print(f">> Loading configuration for: {args.model}...")
config, path = load_config(args.model)
cmd = build_command(config, path)
print(f"> Context: {path}")
print(f"> Command: {' '.join(cmd)}")
print("-" * 40)
try:
# Run and replace the python process (saves memory/PID handling)
os.execvp("llama-server", cmd)
except FileNotFoundError:
print("X Error: 'llama-server' not found in PATH.")
print(" Run: brew install llama.cpp")
if __name__ == "__main__":
main()