91 lines
2.4 KiB
Python
Executable file
91 lines
2.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import tomllib # Requires Python 3.11+ (Standard on macOS Sonoma/Sequoia)
|
|
import subprocess
|
|
|
|
# --- CONFIGURATION ---
|
|
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
GGUF_DIR = os.path.join(BASE_DIR, "gguf")
|
|
|
|
|
|
def load_config(model_name):
|
|
# 1. Locate the folder
|
|
model_path = os.path.join(GGUF_DIR, model_name)
|
|
config_file = os.path.join(model_path, "config.toml")
|
|
|
|
if not os.path.exists(config_file):
|
|
print(f"X Error: Config file not found at {config_file}")
|
|
sys.exit(1)
|
|
|
|
# 2. Parse TOML
|
|
with open(config_file, "rb") as f:
|
|
config = tomllib.load(f)
|
|
|
|
return config, model_path
|
|
|
|
|
|
def build_command(config, model_path):
|
|
server_conf = config.get("server", {})
|
|
|
|
# 1. Find the .gguf file
|
|
gguf_name = server_conf.get("model_file")
|
|
if not gguf_name:
|
|
print("X Error: 'model_file' missing in config.toml")
|
|
sys.exit(1)
|
|
|
|
full_model_path = os.path.join(model_path, gguf_name)
|
|
|
|
# 2. Build the llama-server command
|
|
cmd = ["llama-server"]
|
|
|
|
# Map TOML keys to CLI flags
|
|
# Key = TOML key, Value = CLI flag
|
|
args_map = {
|
|
"model_file": "-m", # We handle the path manually, but good for ref
|
|
"n_gpu_layers": "--n-gpu-layers",
|
|
"ctx_size": "--ctx-size",
|
|
"threads": "--threads",
|
|
"port": "--port",
|
|
"host": "--host",
|
|
"alias": "--alias",
|
|
}
|
|
|
|
cmd.extend(["-m", full_model_path])
|
|
|
|
for key, value in server_conf.items():
|
|
if key == "model_file":
|
|
continue # Already added
|
|
|
|
flag = args_map.get(key)
|
|
if flag:
|
|
cmd.extend([flag, str(value)])
|
|
|
|
return cmd
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Launch a local LLM from config.")
|
|
parser.add_argument("model", help="The name of the folder inside gguf/")
|
|
args = parser.parse_args()
|
|
|
|
print(f">> Loading configuration for: {args.model}...")
|
|
config, path = load_config(args.model)
|
|
|
|
cmd = build_command(config, path)
|
|
|
|
print(f"> Context: {path}")
|
|
print(f"> Command: {' '.join(cmd)}")
|
|
print("-" * 40)
|
|
|
|
try:
|
|
# Run and replace the python process (saves memory/PID handling)
|
|
os.execvp("llama-server", cmd)
|
|
except FileNotFoundError:
|
|
print("X Error: 'llama-server' not found in PATH.")
|
|
print(" Run: brew install llama.cpp")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|