init
This commit is contained in:
commit
37adbcc96f
4 changed files with 219 additions and 0 deletions
15
models/gguf/qwen3-coder/config.toml
Normal file
15
models/gguf/qwen3-coder/config.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[server]
|
||||
# The specific GGUF file to run inside this folder
|
||||
model_file = "Qwen3-Coder-Next-BF16-00001-of-00004.gguf"
|
||||
|
||||
# Hardware Settings
|
||||
n_gpu_layers = 99
|
||||
ctx_size = 131072
|
||||
threads = 24
|
||||
|
||||
# Network Settings
|
||||
port = 8082
|
||||
host = "0.0.0.0"
|
||||
|
||||
# Optional: Set a friendly alias for tools like OpenCode
|
||||
alias = "qwen3-coder"
|
||||
91
models/launch.py
Executable file
91
models/launch.py
Executable file
|
|
@ -0,0 +1,91 @@
|
|||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import tomllib # Requires Python 3.11+ (Standard on macOS Sonoma/Sequoia)
|
||||
import subprocess
|
||||
|
||||
# --- CONFIGURATION ---
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
GGUF_DIR = os.path.join(BASE_DIR, "gguf")
|
||||
|
||||
|
||||
def load_config(model_name):
|
||||
# 1. Locate the folder
|
||||
model_path = os.path.join(GGUF_DIR, model_name)
|
||||
config_file = os.path.join(model_path, "config.toml")
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
print(f"X Error: Config file not found at {config_file}")
|
||||
sys.exit(1)
|
||||
|
||||
# 2. Parse TOML
|
||||
with open(config_file, "rb") as f:
|
||||
config = tomllib.load(f)
|
||||
|
||||
return config, model_path
|
||||
|
||||
|
||||
def build_command(config, model_path):
|
||||
server_conf = config.get("server", {})
|
||||
|
||||
# 1. Find the .gguf file
|
||||
gguf_name = server_conf.get("model_file")
|
||||
if not gguf_name:
|
||||
print("X Error: 'model_file' missing in config.toml")
|
||||
sys.exit(1)
|
||||
|
||||
full_model_path = os.path.join(model_path, gguf_name)
|
||||
|
||||
# 2. Build the llama-server command
|
||||
cmd = ["llama-server"]
|
||||
|
||||
# Map TOML keys to CLI flags
|
||||
# Key = TOML key, Value = CLI flag
|
||||
args_map = {
|
||||
"model_file": "-m", # We handle the path manually, but good for ref
|
||||
"n_gpu_layers": "--n-gpu-layers",
|
||||
"ctx_size": "--ctx-size",
|
||||
"threads": "--threads",
|
||||
"port": "--port",
|
||||
"host": "--host",
|
||||
"alias": "--alias",
|
||||
}
|
||||
|
||||
cmd.extend(["-m", full_model_path])
|
||||
|
||||
for key, value in server_conf.items():
|
||||
if key == "model_file":
|
||||
continue # Already added
|
||||
|
||||
flag = args_map.get(key)
|
||||
if flag:
|
||||
cmd.extend([flag, str(value)])
|
||||
|
||||
return cmd
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Launch a local LLM from config.")
|
||||
parser.add_argument("model", help="The name of the folder inside gguf/")
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f">> Loading configuration for: {args.model}...")
|
||||
config, path = load_config(args.model)
|
||||
|
||||
cmd = build_command(config, path)
|
||||
|
||||
print(f"> Context: {path}")
|
||||
print(f"> Command: {' '.join(cmd)}")
|
||||
print("-" * 40)
|
||||
|
||||
try:
|
||||
# Run and replace the python process (saves memory/PID handling)
|
||||
os.execvp("llama-server", cmd)
|
||||
except FileNotFoundError:
|
||||
print("X Error: 'llama-server' not found in PATH.")
|
||||
print(" Run: brew install llama.cpp")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue