3.1 KiB
#!/bin/bash
llama-server 后台启动脚本
使用方法:
启动: ./llama_server.sh start
停止: ./llama_server.sh stop
重启: ./llama_server.sh restart
状态: ./llama_server.sh status
日志: ./llama_server.sh logs
PID_FILE="/var/run/llama-server.pid" LOG_FILE="/var/log/llama-server.log" SERVICE_NAME="llama-server"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
start() { if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then echo -e "${YELLOW}[WARN]${NC} $SERVICE_NAME 已在运行 (PID: $(cat "$PID_FILE"))"; exit 1 fi echo -e "${GREEN}[INFO]${NC} 启用 GPU 持久化模式..." sudo nvidia-smi -pm 1 echo -e "${GREEN}[INFO]${NC} 正在后台启动 $SERVICE_NAME,日志: $LOG_FILE"
nohup env GGML_CUDA_DISABLE_GRAPHS=1 \
~/llama.cpp/build/bin/llama-server \
-m /root/models/Qwen3.5-122B-A10B-GGUF/Qwen3.5-122B-A10B-UD-IQ3_XXS.gguf \
--host 0.0.0.0 --port 8000 \
-ngl 999 -c 131072 -t 14 \
--flash-attn on -b 512 -ub 256 \
--no-mmap --mlock \
-ctk q4_0 -ctv q4_0 \
--parallel 1 --temp 1.0 --top-k 20 --top-p 0.95 \
--jinja \
-ot "\.ffn_(up|down|gate)_exps\.=CPU" \
--rope-scaling yarn --rope-scale 4 --yarn-orig-ctx 32768 \
--override-kv qwen3moe.context_length=int:131072 \
--reasoning on \
-a "Qwen3.5-122B-A10B" \
>> "$LOG_FILE" 2>&1 &
echo $! > "$PID_FILE"; sleep 2
if kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
echo -e "${GREEN}[OK]${NC} 已启动 (PID: $(cat "$PID_FILE")),API: http://0.0.0.0:8000"
else
echo -e "${RED}[ERROR]${NC} 启动失败,请查看: $LOG_FILE"; rm -f "$PID_FILE"; exit 1
fi
}
stop() { [ ! -f "$PID_FILE" ] && echo -e "${YELLOW}[WARN]${NC} 未运行" && exit 1 PID=$(cat "$PID_FILE") if kill -0 "$PID" 2>/dev/null; then echo -e "${GREEN}[INFO]${NC} 停止 $SERVICE_NAME (PID: $PID)..." kill "$PID" for i in $(seq 1 15); do kill -0 "$PID" 2>/dev/null || break; sleep 1; done kill -0 "$PID" 2>/dev/null && kill -9 "$PID" rm -f "$PID_FILE"; echo -e "${GREEN}[OK]${NC} 已停止" else echo -e "${YELLOW}[WARN]${NC} 进程不存在,清理 PID 文件"; rm -f "$PID_FILE" fi }
status() {
if [ -f "$PID_FILE" ] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then
PID=$(cat "$PID_FILE")
echo -e "${GREEN}[运行中]${NC} PID: $PID"
ps -p "$PID" -o pid,user,%cpu,%mem,etime --no-headers
nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total,temperature.gpu
--format=csv,noheader,nounits |
awk -F',' '{printf "GPU:%s 使用率:%s%% 显存:%s/%sMiB 温度:%s°C\n",$1,$2,$3,$4,$5}'
else
echo -e "${RED}[未运行]${NC} $SERVICE_NAME"; rm -f "$PID_FILE"
fi
}
logs() { [ ! -f "$LOG_FILE" ] && echo "日志不存在: $LOG_FILE" && exit 1 tail -f "$LOG_FILE" }
case "$1" in start) start ;; stop) stop ;; restart) stop; sleep 1; start ;; status) status ;; logs) logs ;; *) echo "用法: $0 {start|stop|restart|status|logs}" ;; esac