[ Status ] add the async way to get agent status
This commit is contained in:
@@ -1,17 +1,17 @@
|
||||
package io.wdd.rpc.scheduler.job;
|
||||
|
||||
import io.wdd.rpc.scheduler.config.QuartzLogOperator;
|
||||
import io.wdd.rpc.scheduler.service.status.CheckAgentAliveStatus;
|
||||
import io.wdd.rpc.scheduler.service.status.AgentAliveStatusMonitorService;
|
||||
import org.quartz.JobExecutionContext;
|
||||
import org.quartz.JobExecutionException;
|
||||
import org.springframework.scheduling.quartz.QuartzJobBean;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
public class AgentStatusMonitorJob extends QuartzJobBean {
|
||||
public class AgentAliveStatusMonitorJob extends QuartzJobBean {
|
||||
|
||||
@Resource
|
||||
CheckAgentAliveStatus checkAgentAliveStatus;
|
||||
AgentAliveStatusMonitorService agentAliveStatusMonitorService;
|
||||
|
||||
@Resource
|
||||
QuartzLogOperator quartzLogOperator;
|
||||
@@ -23,7 +23,7 @@ public class AgentStatusMonitorJob extends QuartzJobBean {
|
||||
//JobDataMap jobDataMap = jobExecutionContext.getJobDetail().getJobDataMap();
|
||||
|
||||
// actually execute the monitor service
|
||||
checkAgentAliveStatus.go();
|
||||
agentAliveStatusMonitorService.go();
|
||||
|
||||
// log to somewhere
|
||||
quartzLogOperator.save();
|
||||
@@ -1,8 +1,8 @@
|
||||
package io.wdd.rpc.scheduler.service;
|
||||
|
||||
|
||||
import io.wdd.rpc.scheduler.job.AgentAliveStatusMonitorJob;
|
||||
import io.wdd.rpc.scheduler.job.AgentRunMetricStatusJob;
|
||||
import io.wdd.rpc.scheduler.job.AgentStatusMonitorJob;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.quartz.CronExpression;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
@@ -103,7 +103,7 @@ public class BuildStatusScheduleTask {
|
||||
|
||||
// build the Job
|
||||
octopusQuartzService.addMission(
|
||||
AgentStatusMonitorJob.class,
|
||||
AgentAliveStatusMonitorJob.class,
|
||||
"monitorAllAgentStatusJob",
|
||||
JOB_GROUP_NAME,
|
||||
healthyCheckStartDelaySeconds,
|
||||
|
||||
@@ -4,6 +4,7 @@ import io.wdd.common.utils.TimeUtils;
|
||||
import io.wdd.rpc.init.AgentStatusCacheService;
|
||||
import io.wdd.rpc.scheduler.service.BuildStatusScheduleTask;
|
||||
import io.wdd.rpc.status.OctopusStatusMessage;
|
||||
import io.wdd.rpc.status.service.AsyncStatusService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
@@ -13,8 +14,7 @@ import org.springframework.stereotype.Service;
|
||||
import javax.annotation.Resource;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static io.wdd.rpc.init.AgentStatusCacheService.ALL_AGENT_TOPIC_NAME_LIST;
|
||||
@@ -38,7 +38,7 @@ import static io.wdd.rpc.status.OctopusStatusMessage.HEALTHY_STATUS_MESSAGE_TYPE
|
||||
@Service
|
||||
@Slf4j
|
||||
@Lazy
|
||||
public class CheckAgentAliveStatus {
|
||||
public class AgentAliveStatusMonitorService {
|
||||
|
||||
private static final int MAX_WAIT_AGENT_REPORT_STATUS_TIME = 5;
|
||||
@Resource
|
||||
@@ -52,38 +52,35 @@ public class CheckAgentAliveStatus {
|
||||
@Resource
|
||||
BuildStatusScheduleTask buildStatusScheduleTask;
|
||||
|
||||
@Resource
|
||||
AsyncStatusService asyncStatusService;
|
||||
|
||||
|
||||
private HashMap<String, String> AGENT_HEALTHY_INIT_MAP;
|
||||
|
||||
public void go() {
|
||||
|
||||
try {
|
||||
// 1. 获取所有注册的Agent 手动更新
|
||||
agentStatusCacheService.updateAllAgentTopicNameCache();
|
||||
if (CollectionUtils.isEmpty(ALL_AGENT_TOPIC_NAME_LIST)) {
|
||||
log.warn("[Scheduler] No Agent Registered ! End Up Status Monitor !");
|
||||
return;
|
||||
}
|
||||
|
||||
// 1.1 检查 Agent状态保存数据结构是否正常
|
||||
checkOrCreateRedisHealthyKey();
|
||||
|
||||
// 2.发送状态检查信息, agent需要update相应的HashMap的值
|
||||
// 2023年6月14日 2. 发送ping等待所有的Agent返回PONG, 然后进行redis的状态修改
|
||||
CountDownLatch aliveStatusCDL = new CountDownLatch(ALL_AGENT_TOPIC_NAME_LIST.size());
|
||||
|
||||
|
||||
buildAndSendAgentHealthMessage();
|
||||
|
||||
// 3. 休眠 MAX_WAIT_AGENT_REPORT_STATUS_TIME 秒 等待agent的状态上报
|
||||
TimeUnit.SECONDS.sleep(MAX_WAIT_AGENT_REPORT_STATUS_TIME);
|
||||
|
||||
// 4.检查相应的 状态HashMap,然后全部置为零
|
||||
// todo 存储到某个地方,目前只是打印日志
|
||||
updateAllAgentHealthyStatus();
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
// 1. 获取所有注册的Agent 手动更新
|
||||
agentStatusCacheService.updateAllAgentTopicNameCache();
|
||||
if (CollectionUtils.isEmpty(ALL_AGENT_TOPIC_NAME_LIST)) {
|
||||
log.warn("[Scheduler] No Agent Registered ! End Up Status Monitor !");
|
||||
return;
|
||||
}
|
||||
|
||||
// 1.1 检查 Agent状态保存数据结构是否正常
|
||||
checkOrCreateRedisHealthyKey();
|
||||
|
||||
// 2.发送状态检查信息, agent需要update相应的HashMap的值
|
||||
// 2023年6月14日 2. 发送ping等待所有的Agent返回PONG, 然后进行redis的状态修改
|
||||
|
||||
// 使用同步更新的策略
|
||||
Map<String, Boolean> agentAliveStatusMap = asyncStatusService.AsyncCollectAgentAliveStatus(
|
||||
ALL_AGENT_TOPIC_NAME_LIST,
|
||||
5
|
||||
);
|
||||
|
||||
// 更新Agent的状态
|
||||
updateAllAgentHealthyStatus(agentAliveStatusMap);
|
||||
}
|
||||
|
||||
private void checkOrCreateRedisHealthyKey() {
|
||||
@@ -129,8 +126,7 @@ public class CheckAgentAliveStatus {
|
||||
.map(
|
||||
agentTopicName -> OctopusStatusMessage
|
||||
.builder()
|
||||
.agentTopicName(agentTopicName)
|
||||
.type(HEALTHY_STATUS_MESSAGE_TYPE)
|
||||
.statusType(HEALTHY_STATUS_MESSAGE_TYPE)
|
||||
.build()
|
||||
)
|
||||
.collect(Collectors.toList());
|
||||
@@ -139,15 +135,15 @@ public class CheckAgentAliveStatus {
|
||||
collectAgentStatus.statusMessageToAgent(collect);
|
||||
}
|
||||
|
||||
private void updateAllAgentHealthyStatus() {
|
||||
private void updateAllAgentHealthyStatus(Map<String, Boolean> agentAliveStatusMap) {
|
||||
|
||||
String currentTimeString = TimeUtils.currentTimeString();
|
||||
|
||||
// 更新所有的缓存状态
|
||||
agentStatusCacheService.updateAgentStatusMapCache();
|
||||
agentStatusCacheService.updateAgentStatusMapCache(agentAliveStatusMap);
|
||||
|
||||
// 执行Metric上报定时任务
|
||||
buildStatusScheduleTask.buildAgentMetricScheduleTask();
|
||||
// buildStatusScheduleTask.buildAgentMetricScheduleTask();
|
||||
|
||||
// 这里仅仅是更新时间
|
||||
redisTemplate
|
||||
@@ -158,6 +154,14 @@ public class CheckAgentAliveStatus {
|
||||
currentTimeString
|
||||
);
|
||||
|
||||
// 更新所有的Agent状态
|
||||
redisTemplate
|
||||
.opsForHash()
|
||||
.putAll(
|
||||
ALL_AGENT_STATUS_REDIS_KEY,
|
||||
agentAliveStatusMap
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ public class AgentRuntimeMetricStatus {
|
||||
agentTopicName -> {
|
||||
return OctopusStatusMessage
|
||||
.builder()
|
||||
.type(METRIC_STATUS_MESSAGE_TYPE)
|
||||
.statusType(METRIC_STATUS_MESSAGE_TYPE)
|
||||
.metricRepeatCount(metricRepeatCount)
|
||||
.metricRepeatPinch(metricRepeatPinch)
|
||||
.agentTopicName(agentTopicName)
|
||||
|
||||
Reference in New Issue
Block a user