Files
ProjectAGiPrompt/32-WDD-AI服务器/1-GPU服务器/10-运行脚本.md
2026-06-15 14:14:24 +08:00

3.1 KiB
Raw Blame History

#!/bin/bash

llama-server 后台启动脚本

使用方法:

启动: ./llama_server.sh start

停止: ./llama_server.sh stop

重启: ./llama_server.sh restart

状态: ./llama_server.sh status

日志: ./llama_server.sh logs

PID_FILE="/var/run/llama-server.pid" LOG_FILE="/var/log/llama-server.log" SERVICE_NAME="llama-server"

GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'

start() { if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then echo -e "${YELLOW}[WARN]${NC} $SERVICE_NAME 已在运行 (PID: $(cat "$PID_FILE"))"; exit 1 fi echo -e "${GREEN}[INFO]${NC} 启用 GPU 持久化模式..." sudo nvidia-smi -pm 1 echo -e "${GREEN}[INFO]${NC} 正在后台启动 $SERVICE_NAME日志: $LOG_FILE"

nohup env GGML_CUDA_DISABLE_GRAPHS=1 \
    ~/llama.cpp/build/bin/llama-server \
    -m /root/models/Qwen3.5-122B-A10B-GGUF/Qwen3.5-122B-A10B-UD-IQ3_XXS.gguf \
    --host 0.0.0.0 --port 8000 \
    -ngl 999 -c 131072 -t 14 \
    --flash-attn on -b 512 -ub 256 \
    --no-mmap --mlock \
    -ctk q4_0 -ctv q4_0 \
    --parallel 1 --temp 1.0 --top-k 20 --top-p 0.95 \
    --jinja \
    -ot "\.ffn_(up|down|gate)_exps\.=CPU" \
    --rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768 \
    --override-kv qwen3moe.context_length=int:131072 \
    --reasoning on \
    -a "Qwen3.5-122B-A10B" \
    >> "$LOG_FILE" 2>&1 &

echo $! > "$PID_FILE"; sleep 2
if kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
    echo -e "${GREEN}[OK]${NC} 已启动 (PID: $(cat "$PID_FILE"))API: http://0.0.0.0:8000"
else
    echo -e "${RED}[ERROR]${NC} 启动失败,请查看: $LOG_FILE"; rm -f "$PID_FILE"; exit 1
fi

}

stop() { [ ! -f "$PID_FILE" ] && echo -e "${YELLOW}[WARN]${NC} 未运行" && exit 1 PID=$(cat "$PID_FILE") if kill -0 "$PID" 2>/dev/null; then echo -e "${GREEN}[INFO]${NC} 停止 $SERVICE_NAME (PID: $PID)..." kill "$PID" for i in $(seq 1 15); do kill -0 "$PID" 2>/dev/null || break; sleep 1; done kill -0 "$PID" 2>/dev/null && kill -9 "$PID" rm -f "$PID_FILE"; echo -e "${GREEN}[OK]${NC} 已停止" else echo -e "${YELLOW}[WARN]${NC} 进程不存在,清理 PID 文件"; rm -f "$PID_FILE" fi }

status() { if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then PID=$(cat "$PID_FILE") echo -e "${GREEN}[运行中]${NC} PID: $PID" ps -p "$PID" -o pid,user,%cpu,%mem,etime --no-headers nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total,temperature.gpu
--format=csv,noheader,nounits |
awk -F',' '{printf "GPU:%s 使用率:%s%% 显存:%s/%sMiB 温度:%s°C\n",$1,$2,$3,$4,$5}' else echo -e "${RED}[未运行]${NC} $SERVICE_NAME"; rm -f "$PID_FILE" fi }

logs() { [ ! -f "$LOG_FILE" ] && echo "日志不存在: $LOG_FILE" && exit 1 tail -f "$LOG_FILE" }

case "$1" in start) start ;; stop) stop ;; restart) stop; sleep 1; start ;; status) status ;; logs) logs ;; *) echo "用法: $0 {start|stop|restart|status|logs}" ;; esac