#!/bin/bash # llama-server 后台启动脚本 # 使用方法: # 启动: ./llama_server.sh start # 停止: ./llama_server.sh stop # 重启: ./llama_server.sh restart # 状态: ./llama_server.sh status # 日志: ./llama_server.sh logs PID_FILE="/var/run/llama-server.pid" LOG_FILE="/var/log/llama-server.log" SERVICE_NAME="llama-server" GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m' start() { if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then echo -e "${YELLOW}[WARN]${NC} $SERVICE_NAME 已在运行 (PID: $(cat "$PID_FILE"))"; exit 1 fi echo -e "${GREEN}[INFO]${NC} 启用 GPU 持久化模式..." sudo nvidia-smi -pm 1 echo -e "${GREEN}[INFO]${NC} 正在后台启动 $SERVICE_NAME,日志: $LOG_FILE" nohup env GGML_CUDA_DISABLE_GRAPHS=1 \ ~/llama.cpp/build/bin/llama-server \ -m /root/models/Qwen3.5-122B-A10B-GGUF/Qwen3.5-122B-A10B-UD-IQ3_XXS.gguf \ --host 0.0.0.0 --port 8000 \ -ngl 999 -c 131072 -t 14 \ --flash-attn on -b 512 -ub 256 \ --no-mmap --mlock \ -ctk q4_0 -ctv q4_0 \ --parallel 1 --temp 1.0 --top-k 20 --top-p 0.95 \ --jinja \ -ot "\.ffn_(up|down|gate)_exps\.=CPU" \ --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768 \ --override-kv qwen3moe.context_length=int:131072 \ --reasoning on \ -a "Qwen3.5-122B-A10B" \ >> "$LOG_FILE" 2>&1 & echo $! > "$PID_FILE"; sleep 2 if kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then echo -e "${GREEN}[OK]${NC} 已启动 (PID: $(cat "$PID_FILE")),API: http://0.0.0.0:8000" else echo -e "${RED}[ERROR]${NC} 启动失败,请查看: $LOG_FILE"; rm -f "$PID_FILE"; exit 1 fi } stop() { [ ! -f "$PID_FILE" ] && echo -e "${YELLOW}[WARN]${NC} 未运行" && exit 1 PID=$(cat "$PID_FILE") if kill -0 "$PID" 2>/dev/null; then echo -e "${GREEN}[INFO]${NC} 停止 $SERVICE_NAME (PID: $PID)..." kill "$PID" for i in $(seq 1 15); do kill -0 "$PID" 2>/dev/null || break; sleep 1; done kill -0 "$PID" 2>/dev/null && kill -9 "$PID" rm -f "$PID_FILE"; echo -e "${GREEN}[OK]${NC} 已停止" else echo -e "${YELLOW}[WARN]${NC} 进程不存在,清理 PID 文件"; rm -f "$PID_FILE" fi } status() { if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then PID=$(cat "$PID_FILE") echo -e "${GREEN}[运行中]${NC} PID: $PID" ps -p "$PID" -o pid,user,%cpu,%mem,etime --no-headers nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total,temperature.gpu \ --format=csv,noheader,nounits | \ awk -F',' '{printf "GPU:%s 使用率:%s%% 显存:%s/%sMiB 温度:%s°C\n",$1,$2,$3,$4,$5}' else echo -e "${RED}[未运行]${NC} $SERVICE_NAME"; rm -f "$PID_FILE" fi } logs() { [ ! -f "$LOG_FILE" ] && echo "日志不存在: $LOG_FILE" && exit 1 tail -f "$LOG_FILE" } case "$1" in start) start ;; stop) stop ;; restart) stop; sleep 1; start ;; status) status ;; logs) logs ;; *) echo "用法: $0 {start|stop|restart|status|logs}" ;; esac