init

2026-02-24 21:14:02 +00:00 · 2026-02-24 21:14:02 +00:00 · 37adbcc96f
commit 37adbcc96f
4 changed files with 219 additions and 0 deletions
--- a/models/gguf/qwen3-coder/config.toml
+++ b/models/gguf/qwen3-coder/config.toml
@ -0,0 +1,15 @@
+[server]
+# The specific GGUF file to run inside this folder
+model_file = "Qwen3-Coder-Next-BF16-00001-of-00004.gguf"
+
+# Hardware Settings
+n_gpu_layers = 99
+ctx_size = 131072 
+threads = 24
+
+# Network Settings
+port = 8082
+host = "0.0.0.0"
+
+# Optional: Set a friendly alias for tools like OpenCode
+alias = "qwen3-coder"
--- a/models/launch.py
+++ b/models/launch.py
@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+import os
+import sys
+import argparse
+import tomllib  # Requires Python 3.11+ (Standard on macOS Sonoma/Sequoia)
+import subprocess
+
+# --- CONFIGURATION ---
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+GGUF_DIR = os.path.join(BASE_DIR, "gguf")
+
+
+def load_config(model_name):
+    # 1. Locate the folder
+    model_path = os.path.join(GGUF_DIR, model_name)
+    config_file = os.path.join(model_path, "config.toml")
+
+    if not os.path.exists(config_file):
+        print(f"X Error: Config file not found at {config_file}")
+        sys.exit(1)
+
+    # 2. Parse TOML
+    with open(config_file, "rb") as f:
+        config = tomllib.load(f)
+
+    return config, model_path
+
+
+def build_command(config, model_path):
+    server_conf = config.get("server", {})
+
+    # 1. Find the .gguf file
+    gguf_name = server_conf.get("model_file")
+    if not gguf_name:
+        print("X Error: 'model_file' missing in config.toml")
+        sys.exit(1)
+
+    full_model_path = os.path.join(model_path, gguf_name)
+
+    # 2. Build the llama-server command
+    cmd = ["llama-server"]
+
+    # Map TOML keys to CLI flags
+    # Key = TOML key, Value = CLI flag
+    args_map = {
+        "model_file": "-m",  # We handle the path manually, but good for ref
+        "n_gpu_layers": "--n-gpu-layers",
+        "ctx_size": "--ctx-size",
+        "threads": "--threads",
+        "port": "--port",
+        "host": "--host",
+        "alias": "--alias",
+    }
+
+    cmd.extend(["-m", full_model_path])
+
+    for key, value in server_conf.items():
+        if key == "model_file":
+            continue  # Already added
+
+        flag = args_map.get(key)
+        if flag:
+            cmd.extend([flag, str(value)])
+
+    return cmd
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Launch a local LLM from config.")
+    parser.add_argument("model", help="The name of the folder inside gguf/")
+    args = parser.parse_args()
+
+    print(f">> Loading configuration for: {args.model}...")
+    config, path = load_config(args.model)
+
+    cmd = build_command(config, path)
+
+    print(f"> Context: {path}")
+    print(f"> Command: {' '.join(cmd)}")
+    print("-" * 40)
+
+    try:
+        # Run and replace the python process (saves memory/PID handling)
+        os.execvp("llama-server", cmd)
+    except FileNotFoundError:
+        print("X Error: 'llama-server' not found in PATH.")
+        print("   Run: brew install llama.cpp")
+
+
+if __name__ == "__main__":
+    main()