版本封存

This commit is contained in:
zeaslity
2025-12-06 11:26:05 +08:00
parent 13949e1ba8
commit c0ae5e30c4
57 changed files with 2443 additions and 1428 deletions

View File

@@ -0,0 +1,47 @@
package main
import (
"os"
"sync"
"cmii-uav-watchdog-common/models"
)
var PodEnv = models.EnvInfo{}
func init() {
PodEnv = GetEnvInfo()
}
// GetEnvInfo 获取环境信息
// 单例模式, 如果已经初始化过, 则直接返回
func GetEnvInfo() models.EnvInfo {
var once sync.Once
if PodEnv.K8S_NAMESPACE != "" {
return PodEnv
}
once.Do(func() {
PodEnv = models.EnvInfo{
K8S_NAMESPACE: os.Getenv("K8S_NAMESPACE"),
APPLICATION_NAME: os.Getenv("APPLICATION_NAME"),
CUST_JAVA_OPTS: os.Getenv("CUST_JAVA_OPTS"),
BIZ_CONFIG_GROUP: os.Getenv("BIZ_CONFIG_GROUP"),
SYS_CONFIG_GROUP: os.Getenv("SYS_CONFIG_GROUP"),
IMAGE_NAME: os.Getenv("IMAGE_NAME"),
JAVA_VERSION: os.Getenv("JAVA_VERSION"),
GIT_COMMIT: os.Getenv("GIT_COMMIT"),
GIT_BRANCH: os.Getenv("GIT_BRANCH"),
NODE_NAME: os.Getenv("NODE_NAME"),
NODE_IP: os.Getenv("NODE_IP"),
POD_NAME: os.Getenv("POD_NAME"),
LIMIT_CPU: os.Getenv("LIMIT_CPU"),
LIMIT_MEMORY: os.Getenv("LIMIT_MEMORY"),
REQUEST_CPU: os.Getenv("REQUEST_CPU"),
REQUEST_MEMORY: os.Getenv("REQUEST_MEMORY"),
}
})
return PodEnv
}

View File

@@ -1 +1,70 @@
package cmd
package main
// import (
// "cmii-uav-watchdog-agent/host_info"
// "cmii-uav-watchdog-common/wdd_log"
// "net/http"
// "os"
// "os/signal"
// "syscall"
// "github.com/gin-gonic/gin"
// )
// func StartHostInfoGin() {
// // 创建一个默认的 Gin 路由
// var r = gin.Default() // 定义一个 GET 路由
// r.GET("/ping", func(c *gin.Context) {
// c.JSON(http.StatusOK, gin.H{"message": "pong"})
// })
// // 定义一个 POST 路由
// r.POST("/echo", func(c *gin.Context) {
// var json map[string]interface{}
// if err := c.ShouldBindJSON(&json); err == nil {
// c.JSON(http.StatusOK, json)
// } else {
// c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
// }
// })
// r.GET("/cpu", func(c *gin.Context) {
// cpuInfo := host_info.GetCPUInfo() // 直接返回 CPU 信息
// c.JSON(http.StatusOK, cpuInfo)
// })
// r.GET("/memory", func(c *gin.Context) {
// memInfo := host_info.GetMemoryInfo() // 直接返回内存信息
// c.JSON(http.StatusOK, memInfo)
// })
// r.GET("/disk", func(c *gin.Context) {
// diskInfo := host_info.GetDiskInfo() // 直接返回磁盘信息
// c.JSON(http.StatusOK, diskInfo)
// })
// r.GET("/motherboard", func(c *gin.Context) {
// mbInfo := host_info.GetMotherboardInfo() // 直接返回主板信息
// c.JSON(http.StatusOK, mbInfo)
// })
// r.GET("/network", func(c *gin.Context) {
// networkInterfaces := host_info.GetNetworkInterfaces()
// c.JSON(http.StatusOK, networkInterfaces)
// })
// r.GET("/all", func(c *gin.Context) {
// allInfo := host_info.GetAllInfo()
// c.JSON(http.StatusOK, allInfo)
// })
// //r.GET("/phy", func(c *gin.Context) {
// // allInfo, _ := services.GetPVForLV()
// // c.JSON(http.StatusOK, allInfo)
// //})
// // 启动服务,监听在 8080 端口
// r.Run(":8098")
// // 等待终止信号
// sigs := make(chan os.Signal, 1)
// signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
// <-sigs
// wdd_log.Info("正在关闭服务...")
// }

View File

@@ -1,16 +1,20 @@
package cmd
package main
import (
"cmii-uav-watchdog-common/wdd_log"
"flag"
"log"
"os"
"os/exec"
"os/signal"
"strings"
"sync"
"syscall"
"time"
)
// 是否是Debug模式
var DebugMode = false
var (
businessProgramType = flag.String("business-program-type", "", "Type of business program (java or python)")
businessProgramPath = flag.String("business-program-path", "", "Path to the business program file")
@@ -19,45 +23,221 @@ var (
mu sync.Mutex
)
// java 启动参数
var (
podName = "unknown"
imageName = "unknown"
imageVersion = "unknown"
workpath = "/cmii"
custJavaOpts = "-Xms200m -Xmx1500m -Djava.awt.headless=true -Dlog4j2.formatMsgNoLookups=true "
envJvmTimezone = "Asia/Shanghai"
k8sNamespace = "default"
applicationName = "app"
nacosRegistry = "helm-nacos:8848"
nacosDiscoveryIp = "127.0.0.1"
nacosDiscoveryPort = "8080"
nacosUsername = "nacos"
nacosPassword = "nacos"
bizConfigGroup = "wdd-biz"
sysConfigGroup = "wdd-sys"
)
// 初始化
func init() {
// 获取环境变量 特殊设置才能开启DEBUG
debugMode := os.Getenv("CMII_DEBUG_MODE")
if debugMode == "WDD_DEBUG" {
DebugMode = true
}
wdd_log.Info("DebugMode 是否开启: %v", DebugMode)
}
// 初始化配置并打印配置信息
func initConfig() {
// 获取环境变量
imageName = getEnvOrDefault("IMAGE_NAME", imageName)
if imageName != "unknown" {
parts := strings.Split(imageName, ":")
if len(parts) > 1 {
imageVersion = parts[len(parts)-1]
}
}
workpath = getEnvOrDefault("WORKPATH", workpath)
custJavaOpts = getEnvOrDefault("CUST_JAVA_OPTS", custJavaOpts)
envJvmTimezone = getEnvOrDefault("ENV_JVM_TIMEZONE", envJvmTimezone)
k8sNamespace = getEnvOrDefault("K8S_NAMESPACE", k8sNamespace)
applicationName = getEnvOrDefault("APPLICATION_NAME", applicationName)
nacosRegistry = getEnvOrDefault("NACOS_REGISTRY", nacosRegistry)
nacosDiscoveryIp = getEnvOrDefault("NACOS_DISCOVERY_IP", nacosDiscoveryIp)
nacosDiscoveryPort = getEnvOrDefault("NACOS_DISCOVERY_PORT", nacosDiscoveryPort)
nacosUsername = getEnvOrDefault("NACOS_USERNAME", nacosUsername)
nacosPassword = getEnvOrDefault("NACOS_PASSWORD", nacosPassword)
// 设置默认配置组
defaultConfigGroup := "default"
if imageVersion != "unknown" {
parts := strings.Split(imageVersion, "-")
if len(parts) > 0 {
defaultConfigGroup = parts[0]
}
}
bizConfigGroup = getEnvOrDefault("BIZ_CONFIG_GROUP", bizConfigGroup)
if bizConfigGroup == "" {
wdd_log.Info("[CONTAINER] As BIZ_CONFIG_GROUP is null, it set default value [%s]", defaultConfigGroup)
bizConfigGroup = defaultConfigGroup
os.Setenv("BIZ_CONFIG_GROUP", bizConfigGroup)
}
sysConfigGroup = getEnvOrDefault("SYS_CONFIG_GROUP", sysConfigGroup)
if sysConfigGroup == "" {
wdd_log.Info("[CONTAINER] As SYS_CONFIG_GROUP is null, it set default value [%s]", defaultConfigGroup)
sysConfigGroup = defaultConfigGroup
os.Setenv("SYS_CONFIG_GROUP", sysConfigGroup)
}
// 打印配置信息
wdd_log.Info("[CONTAINER] %s image is running ...", imageName)
wdd_log.Info("[CONTAINER] IMAGE_VERSION is %s", imageVersion)
wdd_log.Info("[CONTAINER] WORKPATH is %s", workpath)
wdd_log.Info("[CONTAINER] CUST_JAVA_OPTS is %s", custJavaOpts)
wdd_log.Info("[CONTAINER] JVM_TIMEZONE is %s", envJvmTimezone)
wdd_log.Info("[CONTAINER] K8S_NAMESPACE is %s", k8sNamespace)
wdd_log.Info("[CONTAINER] APPLICATION_NAME is %s", applicationName)
wdd_log.Info("[CONTAINER] NACOS_REGISTRY is %s", nacosRegistry)
wdd_log.Info("[CONTAINER] NACOS_DISCOVERY_IP is %s", nacosDiscoveryIp)
wdd_log.Info("[CONTAINER] NACOS_DISCOVERY_PORT is %s", nacosDiscoveryPort)
wdd_log.Info("[CONTAINER] NACOS_USERNAME is %s", nacosUsername)
wdd_log.Info("[CONTAINER] NACOS_PASSWORD is %s", nacosPassword)
wdd_log.Info("[CONTAINER] BIZ_CONFIG_GROUP is %s", bizConfigGroup)
wdd_log.Info("[CONTAINER] SYS_CONFIG_GROUP is %s", sysConfigGroup)
wdd_log.Info("[CONTAINER] starting...")
}
// 获取环境变量,如果为空则返回默认值
func getEnvOrDefault(key, defaultValue string) string {
value := os.Getenv(key)
if value == "" {
return defaultValue
}
return value
}
func startBusinessProcess(programType, programPath string) *exec.Cmd {
var cmd *exec.Cmd
switch programType {
case "java":
cmd = exec.Command("java", "-jar", programPath)
// 初始化配置
initConfig()
// 构建命令参数列表
args := []string{}
// 添加Java选项
if custJavaOpts != "" {
// 分割CUST_JAVA_OPTS中的多个参数
for _, opt := range splitArgs(custJavaOpts) {
if opt != "" {
args = append(args, opt)
}
}
}
// 添加主JAR文件
args = append(args, "-jar", programPath)
// 添加其他参数
args = append(args, []string{
"--user.timezone=" + envJvmTimezone,
"-Dfile.encoding=UTF-8",
"--spring.main.allow-bean-definition-overriding=true",
"--spring.application.name=" + applicationName,
"--spring.cloud.nacos.username=" + nacosUsername,
"--spring.cloud.nacos.password=" + nacosPassword,
"--spring.cloud.nacos.config.server-addr=" + nacosRegistry,
"--spring.cloud.nacos.config.extension-configs[0].data-id=" + applicationName + ".yml",
"--spring.cloud.nacos.config.extension-configs[0].group=" + bizConfigGroup,
"--spring.cloud.nacos.config.extension-configs[0].refresh=true",
"--spring.cloud.nacos.config.shared-configs[0].data-id=cmii-backend-system.yml",
"--spring.cloud.nacos.config.shared-configs[0].group=" + sysConfigGroup,
"--spring.cloud.nacos.config.shared-configs[0].refresh=true",
"--spring.cloud.nacos.discovery.server-addr=" + nacosRegistry,
"--spring.cloud.nacos.discovery.ip=" + nacosDiscoveryIp,
"--spring.cloud.nacos.discovery.port=" + nacosDiscoveryPort,
}...)
wdd_log.Info("[CONTAINER] java args: %v", args)
cmd = exec.Command("java", args...)
case "python":
cmd = exec.Command("python", programPath)
default:
log.Fatalf("Unsupported business program type: %s", programType)
wdd_log.Error("不支持的业务程序类型: %s", programType)
}
return cmd
}
// 分割命令行参数
func splitArgs(s string) []string {
var args []string
var inQuote bool
var current string
var quoteChar rune
for _, r := range s {
switch {
case (r == '"' || r == '\'') && !inQuote:
inQuote = true
quoteChar = r
case r == quoteChar && inQuote:
inQuote = false
quoteChar = 0
case r == ' ' && !inQuote:
if current != "" {
args = append(args, current)
current = ""
}
default:
current += string(r)
}
}
if current != "" {
args = append(args, current)
}
return args
}
func main() {
// 解析命令行参数
flag.Parse()
if *businessProgramType == "" || *businessProgramPath == "" {
log.Fatal("Missing required flags: -business-program-type and -business-program-path must be specified")
wdd_log.Error("缺少必要的参数: -business-program-type -business-program-path 必须指定")
}
// 信号处理
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM)
// StartHeartbeatDetection(signalChan)
go func() {
for sig := range signalChan {
log.Printf("Received signal: %v", sig)
wdd_log.Info("接收到信号: %v", sig)
mu.Lock()
stopRequested = true
if currentCmd != nil && currentCmd.Process != nil {
// 发送 SIGTERM 给业务进程
if err := currentCmd.Process.Signal(syscall.SIGTERM); err != nil {
log.Printf("Failed to send SIGTERM to process: %v", err)
wdd_log.Error("向进程发送SIGTERM信号失败: %v", err)
}
// 等待 10 秒后强制杀死进程
time.AfterFunc(10*time.Second, func() {
mu.Lock()
defer mu.Unlock()
if currentCmd != nil && currentCmd.Process != nil {
log.Println("Graceful shutdown timeout, sending SIGKILL")
wdd_log.Warn("优雅关闭超时,发送SIGKILL信号")
currentCmd.Process.Kill()
}
})
@@ -66,12 +246,19 @@ func main() {
}
}()
// 授权检测
go func() {
for {
StartHeartbeatDetection(signalChan)
}
}()
// 主循环
for {
mu.Lock()
if stopRequested {
mu.Unlock()
log.Println("Shutting down due to stop request")
wdd_log.Info("收到停止请求,正在关闭")
os.Exit(0)
}
mu.Unlock()
@@ -82,11 +269,16 @@ func main() {
// 启动业务进程
if err := cmd.Start(); err != nil {
log.Printf("Failed to start business process: %v", err)
wdd_log.Error("启动业务进程失败: %v", err)
time.Sleep(5 * time.Second)
continue
}
// 业务进程启动成功
if *businessProgramType == "java" {
wdd_log.Info("[CONTAINER] 程序启动成功!")
}
mu.Lock()
currentCmd = cmd
mu.Unlock()
@@ -98,21 +290,21 @@ func main() {
mu.Unlock()
if err != nil {
log.Printf("Business process exited with error: %v", err)
wdd_log.Error("业务进程异常退出: %v", err)
} else {
log.Println("Business process exited normally")
wdd_log.Info("业务进程正常退出")
}
mu.Lock()
if stopRequested {
mu.Unlock()
log.Println("Shutting down due to stop request")
wdd_log.Info("收到停止请求,正在关闭")
os.Exit(0)
}
mu.Unlock()
// 等待 5 秒后重启
log.Println("Restarting business process in 5 seconds...")
wdd_log.Info("5秒后重启业务进程...")
time.Sleep(5 * time.Second)
}
}

View File

@@ -1,76 +1,95 @@
package cmd
package main
import (
"cmii-uav-watchdog-agent/host_info"
"cmii-uav-watchdog-agent/rpc"
"cmii-uav-watchdog-agent/totp"
"cmii-uav-watchdog-common/models"
"fmt"
"log"
"cmii-uav-watchdog-common/totp_tier_two"
"cmii-uav-watchdog-common/wdd_log"
"os"
"os/signal"
"syscall"
"time"
)
const (
var (
// 最大重试次数
maxRetryCount = 5
maxRetryCount = 12
// 默认心跳检测间隔
defaultHeartbeatInterval = 30 * time.Second
defaultHeartbeatInterval = 2 * time.Hour
// 检测失败后的等待间隔
failWaitInterval = 5 * time.Second
// 环境变量名称
appNameEnv = "APP_NAME"
failWaitInterval = 1 * time.Hour
)
// 启动心跳检测
func StartHeartbeatDetection() {
log.Println("启动心跳检测任务...")
var tierTwoTotpSecret = ""
// StartHeartbeatDetection 启动心跳检测
func StartHeartbeatDetection(signalChan chan os.Signal) {
wdd_log.Info("启动心跳检测任务...")
// variable
var err error
// 如果Debug模式那么使用环境变量赋值
if DebugMode {
// 如果从在环境变量 那么使用环境变量赋值
heartbeatInterval := os.Getenv("WATCHDOG_AGENT_HEARTBEAT_INTERVAL")
if heartbeatInterval != "" {
defaultHeartbeatInterval, err = time.ParseDuration(heartbeatInterval)
if err != nil {
wdd_log.Error("无法解析环境变量: %v", err)
}
wdd_log.Info("已更新心跳检测间隔 => %s", defaultHeartbeatInterval)
}
failWaitIntervalEnv := os.Getenv("WATCHDOG_AGENT_FAIL_WAIT_INTERVAL")
if failWaitIntervalEnv != "" {
failWaitInterval, err = time.ParseDuration(failWaitIntervalEnv)
if err != nil {
wdd_log.Error("无法解析环境变量: %v", err)
}
wdd_log.Info("已更新心跳检测失败等待间隔 => %s", failWaitInterval)
}
//
}
// 创建RPC客户端
client := rpc.NewClient(nil)
heartbeatURL := os.Getenv("WATCHDOG_HEARTBEAT_URL")
client := rpc.NewClient(nil, heartbeatURL)
// 监听终止信号
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, os.Interrupt, syscall.SIGTERM)
wdd_log.Info("心跳检测URL: %s", client.GetHeartbeatURL())
// 失败计数器
failCount := 0
failCount := 1
// 心跳检测循环
for {
select {
case <-signalChan:
log.Println("收到终止信号,停止心跳检测")
wdd_log.Info("收到终止信号,停止心跳检测")
return
default:
// 尝试发送心跳请求
authorized, err := sendHeartbeat(client)
if err != nil {
log.Printf("心跳检测失败: %v", err)
wdd_log.Error("第 %d 次心跳检测失败: %v", failCount, err)
failCount++
} else if !authorized {
log.Println("未获得授权")
wdd_log.Warn("第 %d 次心跳检测未获得授权", failCount)
failCount++
} else {
// 检测成功,重置失败计数
failCount = 0
log.Println("心跳检测成功,已获得授权")
failCount = 1
wdd_log.Info("第 %d 次心跳检测成功,已获得授权", failCount)
}
// 检查是否达到最大失败次数
if failCount >= maxRetryCount {
log.Printf("心跳检测连续失败 %d 次,发送终止信号", failCount)
// 发送终止信号给start_up.go
process, err := os.FindProcess(os.Getpid())
if err == nil {
process.Signal(syscall.SIGTERM)
}
wdd_log.Fatal("心跳检测连续失败 %d 次,发送终止信号", failCount)
signalChan <- syscall.SIGTERM
return
}
@@ -88,8 +107,9 @@ func StartHeartbeatDetection() {
// 发送心跳请求
func sendHeartbeat(client *rpc.Client) (bool, error) {
// 1. 获取主机信息
hostInfoData := services.GetAllInfo()
hostInfoData := host_info.GetAllInfo()
hostInfo := models.HostInfo{
SystemInfo: hostInfoData.SystemInfo,
CPUInfo: hostInfoData.CPUInfo,
@@ -98,26 +118,21 @@ func sendHeartbeat(client *rpc.Client) (bool, error) {
NetInfo: hostInfoData.NetInfo,
}
// 2. 获取应用名称
appName := os.Getenv(appNameEnv)
if appName == "" {
appName = "unknown-app"
log.Printf("警告: 环境变量 %s 未设置,使用默认值: %s", appNameEnv, appName)
}
// 获取环境信息
envInfo := GetEnvInfo()
// 构建心跳请求
request := &models.HeartbeatRequest{
HostInfo: hostInfo,
Timestamp: time.Now().Unix(),
AppName: appName,
EnvInfo: envInfo,
}
// 3. 如果已有TOTP密钥则生成TOTP验证码
totpSecret := totp.GetTOTPSecret()
if totpSecret != "" {
totpCode, err := totp.GenerateTOTPCode()
if tierTwoTotpSecret != "" {
totpCode, err := totp_tier_two.GenerateTierTwoTOTPCode(tierTwoTotpSecret)
if err != nil {
log.Printf("生成TOTP验证码失败: %v", err)
wdd_log.Error("生成TOTP验证码失败: %v", err)
} else {
request.TOTPCode = totpCode
}
@@ -126,20 +141,24 @@ func sendHeartbeat(client *rpc.Client) (bool, error) {
// 4. 发送心跳请求
response, err := client.SendHeartbeatWithRetry(request, 10*time.Second)
if err != nil {
return false, fmt.Errorf("发送心跳请求失败: %w", err)
return false, err
}
if response == nil {
return false, err
}
// 5. 处理响应
if response.SecondTOTPSecret != "" {
// 存储TOTP密钥
totp.SetTOTPSecret(response.SecondTOTPSecret)
log.Println("已更新TOTP密钥")
tierTwoTotpSecret = response.SecondTOTPSecret
wdd_log.Info("已更新TOTP密钥 => %s", tierTwoTotpSecret)
}
// 6. 如果有TOTP验证码进行验证
if response.TOTPCode != "" && totpSecret != "" {
if !totp.ValidateTOTPCode(response.TOTPCode) {
log.Println("TOTP验证码验证失败")
if response.TOTPCode != "" && tierTwoTotpSecret != "" {
if !totp_tier_two.VerifyTierTwoTOTPCode(response.TOTPCode, tierTwoTotpSecret) {
wdd_log.Warn("TOTP验证码验证失败")
return false, nil
}
}