Files
cmii-uav-watchdog-project/cmii-uav-watchdog-agent/cmd/watchdog-agent.go
2025-12-06 11:26:05 +08:00

168 lines
4.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"cmii-uav-watchdog-agent/host_info"
"cmii-uav-watchdog-agent/rpc"
"cmii-uav-watchdog-common/models"
"cmii-uav-watchdog-common/totp_tier_two"
"cmii-uav-watchdog-common/wdd_log"
"os"
"syscall"
"time"
)
var (
// 最大重试次数
maxRetryCount = 12
// 默认心跳检测间隔
defaultHeartbeatInterval = 2 * time.Hour
// 检测失败后的等待间隔
failWaitInterval = 1 * time.Hour
)
var tierTwoTotpSecret = ""
// StartHeartbeatDetection 启动心跳检测
func StartHeartbeatDetection(signalChan chan os.Signal) {
wdd_log.Info("启动心跳检测任务...")
// variable
var err error
// 如果Debug模式那么使用环境变量赋值
if DebugMode {
// 如果从在环境变量 那么使用环境变量赋值
heartbeatInterval := os.Getenv("WATCHDOG_AGENT_HEARTBEAT_INTERVAL")
if heartbeatInterval != "" {
defaultHeartbeatInterval, err = time.ParseDuration(heartbeatInterval)
if err != nil {
wdd_log.Error("无法解析环境变量: %v", err)
}
wdd_log.Info("已更新心跳检测间隔 => %s", defaultHeartbeatInterval)
}
failWaitIntervalEnv := os.Getenv("WATCHDOG_AGENT_FAIL_WAIT_INTERVAL")
if failWaitIntervalEnv != "" {
failWaitInterval, err = time.ParseDuration(failWaitIntervalEnv)
if err != nil {
wdd_log.Error("无法解析环境变量: %v", err)
}
wdd_log.Info("已更新心跳检测失败等待间隔 => %s", failWaitInterval)
}
//
}
// 创建RPC客户端
heartbeatURL := os.Getenv("WATCHDOG_HEARTBEAT_URL")
client := rpc.NewClient(nil, heartbeatURL)
wdd_log.Info("心跳检测URL: %s", client.GetHeartbeatURL())
// 失败计数器
failCount := 1
// 心跳检测循环
for {
select {
case <-signalChan:
wdd_log.Info("收到终止信号,停止心跳检测")
return
default:
// 尝试发送心跳请求
authorized, err := sendHeartbeat(client)
if err != nil {
wdd_log.Error("第 %d 次心跳检测失败: %v", failCount, err)
failCount++
} else if !authorized {
wdd_log.Warn("第 %d 次心跳检测未获得授权", failCount)
failCount++
} else {
// 检测成功,重置失败计数
failCount = 1
wdd_log.Info("第 %d 次心跳检测成功,已获得授权", failCount)
}
// 检查是否达到最大失败次数
if failCount >= maxRetryCount {
wdd_log.Fatal("心跳检测连续失败 %d 次,发送终止信号!!", failCount)
signalChan <- syscall.SIGTERM
return
}
// 等待下一次检测
if err != nil || !authorized {
// 失败后等待较短时间
time.Sleep(failWaitInterval)
} else {
// 成功后等待正常间隔
time.Sleep(defaultHeartbeatInterval)
}
}
}
}
// 发送心跳请求
func sendHeartbeat(client *rpc.Client) (bool, error) {
// 1. 获取主机信息
hostInfoData := host_info.GetAllInfo()
hostInfo := models.HostInfo{
SystemInfo: hostInfoData.SystemInfo,
CPUInfo: hostInfoData.CPUInfo,
DiskInfo: hostInfoData.DiskInfo,
MemoryInfo: hostInfoData.MemoryInfo,
NetInfo: hostInfoData.NetInfo,
}
// 获取环境信息
envInfo := GetEnvInfo()
// 构建心跳请求
request := &models.HeartbeatRequest{
HostInfo: hostInfo,
Timestamp: time.Now().Unix(),
EnvInfo: envInfo,
}
// 3. 如果已有TOTP密钥则生成TOTP验证码
if tierTwoTotpSecret != "" {
totpCode, err := totp_tier_two.GenerateTierTwoTOTPCode(tierTwoTotpSecret)
if err != nil {
wdd_log.Error("生成TOTP验证码失败: %v", err)
} else {
request.TOTPCode = totpCode
}
}
// 4. 发送心跳请求
response, err := client.SendHeartbeatWithRetry(request, 10*time.Second)
if err != nil {
return false, err
}
if response == nil {
return false, err
}
// 5. 处理响应
if response.SecondTOTPSecret != "" {
// 存储TOTP密钥
tierTwoTotpSecret = response.SecondTOTPSecret
wdd_log.Info("已更新TOTP密钥 => %s", tierTwoTotpSecret)
}
// 6. 如果有TOTP验证码进行验证
if response.TOTPCode != "" && tierTwoTotpSecret != "" {
if !totp_tier_two.VerifyTierTwoTOTPCode(response.TOTPCode, tierTwoTotpSecret) {
wdd_log.Warn("TOTP验证码验证失败")
return false, nil
}
}
return response.Authorized, nil
}