ai-code/models/launch.py

#!/usr/bin/env python3
import os
import sys
import argparse
import tomllib  # Requires Python 3.11+ (Standard on macOS Sonoma/Sequoia)
import subprocess

# --- CONFIGURATION ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
GGUF_DIR = os.path.join(BASE_DIR, "gguf")


def load_config(model_name):
    # 1. Locate the folder
    model_path = os.path.join(GGUF_DIR, model_name)
    config_file = os.path.join(model_path, "config.toml")

    if not os.path.exists(config_file):
        print(f"X Error: Config file not found at {config_file}")
        sys.exit(1)

    # 2. Parse TOML
    with open(config_file, "rb") as f:
        config = tomllib.load(f)

    return config, model_path


def build_command(config, model_path):
    server_conf = config.get("server", {})

    # 1. Find the .gguf file
    gguf_name = server_conf.get("model_file")
    if not gguf_name:
        print("X Error: 'model_file' missing in config.toml")
        sys.exit(1)

    full_model_path = os.path.join(model_path, gguf_name)

    # 2. Build the llama-server command
    cmd = ["llama-server"]

    # Map TOML keys to CLI flags
    # Key = TOML key, Value = CLI flag
    args_map = {
        "model_file": "-m",  # We handle the path manually, but good for ref
        "n_gpu_layers": "--n-gpu-layers",
        "ctx_size": "--ctx-size",
        "threads": "--threads",
        "port": "--port",
        "host": "--host",
        "alias": "--alias",
    }

    cmd.extend(["-m", full_model_path])

    for key, value in server_conf.items():
        if key == "model_file":
            continue  # Already added

        flag = args_map.get(key)
        if flag:
            cmd.extend([flag, str(value)])

    return cmd


def main():
    parser = argparse.ArgumentParser(description="Launch a local LLM from config.")
    parser.add_argument("model", help="The name of the folder inside gguf/")
    args = parser.parse_args()

    print(f">> Loading configuration for: {args.model}...")
    config, path = load_config(args.model)

    cmd = build_command(config, path)

    print(f"> Context: {path}")
    print(f"> Command: {' '.join(cmd)}")
    print("-" * 40)

    try:
        # Run and replace the python process (saves memory/PID handling)
        os.execvp("llama-server", cmd)
    except FileNotFoundError:
        print("X Error: 'llama-server' not found in PATH.")
        print("   Run: brew install llama.cpp")


if __name__ == "__main__":
    main()