[ server ] [ status ]- update some code - 1

This commit is contained in:
zeaslity
2023-02-03 11:30:20 +08:00
parent 0bd0c9da53
commit fd5f2607b9
12 changed files with 362 additions and 223 deletions

View File

@@ -11,6 +11,7 @@ import java.util.List;
import java.util.stream.Collectors;
import static io.wdd.common.beans.status.OctopusStatusMessage.METRIC_STATUS_MESSAGE_TYPE;
import static io.wdd.rpc.init.ServerBootUpEnvironment.ALL_HEALTHY_AGENT_TOPIC_NAME_LIST;
/**
* 收集OctopusAgent的运行Metric信息
@@ -21,8 +22,6 @@ import static io.wdd.common.beans.status.OctopusStatusMessage.METRIC_STATUS_MESS
@Slf4j
public class AgentRuntimeMetricStatus {
public static List<String> ALL_HEALTHY_AGENT_TOPIC_NAMES;
public static final String METRIC_REPORT_TIME_PINCH = "metricRepeatPinch";
public static final String METRIC_REPORT_TIMES_COUNT = "metricRepeatCount";
@@ -32,29 +31,35 @@ public class AgentRuntimeMetricStatus {
public void collect(int metricRepeatCount, int metricRepeatPinch) {
// 检查基础信息
if (CollectionUtils.isEmpty(ALL_HEALTHY_AGENT_TOPIC_NAMES)) {
if (CollectionUtils.isEmpty(ALL_HEALTHY_AGENT_TOPIC_NAME_LIST)) {
log.error("Metric Status Collect Failed ! no ALL_HEALTHY_AGENT_TOPIC_NAMES");
}
// 构建 OctopusMessage
// 只发送一次消息让Agent循环定时执行任务
buildMetricStatusMessageAndSend(metricRepeatCount, metricRepeatPinch);
// 只发送一次消息让Agent循环定时执行任务
buildMetricStatusMessageAndSend(
metricRepeatCount,
metricRepeatPinch
);
//
}
private void buildMetricStatusMessageAndSend(int metricRepeatCount, int metricRepeatPinch) {
List<OctopusStatusMessage> collect = ALL_HEALTHY_AGENT_TOPIC_NAMES.stream()
List<OctopusStatusMessage> collect = ALL_HEALTHY_AGENT_TOPIC_NAME_LIST
.stream()
.map(
agentTopicName -> {
return OctopusStatusMessage.builder()
return OctopusStatusMessage
.builder()
.type(METRIC_STATUS_MESSAGE_TYPE)
.metricRepeatCount(metricRepeatCount)
.metricRepeatPinch(metricRepeatPinch)
.agentTopicName(agentTopicName)
.build();
}
).collect(Collectors.toList());
)
.collect(Collectors.toList());
// send to the next level
collectAgentStatus.statusMessageToAgent(collect);

View File

@@ -3,22 +3,24 @@ package io.wdd.rpc.scheduler.service.status;
import io.wdd.common.beans.status.AgentHealthyStatusEnum;
import io.wdd.common.beans.status.OctopusStatusMessage;
import io.wdd.common.utils.TimeUtils;
import io.wdd.rpc.init.ServerBootUpEnvironment;
import io.wdd.rpc.scheduler.service.BuildStatusScheduleTask;
import io.wdd.server.beans.vo.ServerInfoVO;
import io.wdd.server.coreService.CoreServerService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Lazy;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;
import javax.annotation.Resource;
import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import static io.wdd.common.beans.status.OctopusStatusMessage.ALL_AGENT_STATUS_REDIS_KEY;
import static io.wdd.common.beans.status.OctopusStatusMessage.HEALTHY_STATUS_MESSAGE_TYPE;
import static io.wdd.rpc.scheduler.service.status.AgentRuntimeMetricStatus.ALL_HEALTHY_AGENT_TOPIC_NAMES;
import static io.wdd.rpc.init.ServerBootUpEnvironment.ALL_AGENT_TOPIC_NAME_LIST;
import static io.wdd.rpc.init.ServerBootUpEnvironment.STATUS_AGENT_LIST_MAP;
/**
* 更新频率被类 BuildStatusScheduleTask.class控制
@@ -36,58 +38,27 @@ import static io.wdd.rpc.scheduler.service.status.AgentRuntimeMetricStatus.ALL_H
*/
@Service
@Slf4j
@Lazy
public class MonitorAllAgentStatus {
/**
* 存储 状态对应Agent列表的Map
* Agent的状态描述为 AgentHealthyStatusEnum
* HEALTHY -> ["agentTopicName-1" "agentTopicName-2"]
* FAILED -> ["agentTopicName-1" "agentTopicName-2"]
*/
public static final Map<String, List<String>> HEALTHY_STATUS_AGENT_LIST_MAP = new HashMap<>();
/**
* 存储所有Agent状态的Map
* <p>
* 内容为 agentTopicName-健康状态
*/
public static final Map<String, String> ALL_AGENT_HEALTHY_STATUS_MAP = new HashMap<>();
/**
* 存储所有的AgentTopicName的缓存
*/
public static final Set<String> ALL_AGENT_TOPIC_NAME_SET = new HashSet<>();
private static final int MAX_WAIT_AGENT_REPORT_STATUS_TIME = 5;
@Resource
RedisTemplate redisTemplate;
@Resource
CollectAgentStatus collectAgentStatus;
@Resource
CoreServerService coreServerService;
ServerBootUpEnvironment serverBootUpEnvironment;
@Resource
BuildStatusScheduleTask buildStatusScheduleTask;
private List<String> ALL_AGENT_TOPIC_NAME_LIST;
private HashMap<String, String> AGENT_HEALTHY_INIT_MAP;
public void go() {
try {
// 1. 获取所有注册的Agent
// todo need to cache this
List<ServerInfoVO> allAgentInfo = coreServerService.serverGetAll();
Assert.notEmpty(
allAgentInfo,
"not agent registered ! skip the agent healthy status check !"
);
ALL_AGENT_TOPIC_NAME_LIST = allAgentInfo
.stream()
.map(ServerInfoVO::getTopicName)
.collect(Collectors.toList());
// 2023-01-16
ALL_AGENT_TOPIC_NAME_SET.clear();
ALL_AGENT_TOPIC_NAME_SET.addAll(ALL_AGENT_TOPIC_NAME_LIST);
// 1. 获取所有注册的Agent 手动更新
// 1.1 检查 Agent状态保存数据结构是否正常
checkOrCreateRedisHealthyKey();
@@ -108,39 +79,38 @@ public class MonitorAllAgentStatus {
private void checkOrCreateRedisHealthyKey() {
// must init the cached map && make sure the redis key existed!
if (null == AGENT_HEALTHY_INIT_MAP || !redisTemplate.hasKey(ALL_AGENT_STATUS_REDIS_KEY)) {
log.info("ALL_AGENT_STATUS_REDIS_KEY not existed , start to create");
// 检查开始的时候 需要手动将所有Agent的状态置为0
// Agent如果存活,那么就可以将其自身状态修改为1
// build the redis all agent healthy map struct
HashMap<String, String> initMap = new HashMap<>(32);
ALL_AGENT_TOPIC_NAME_LIST
.stream()
.forEach(
agentTopicName -> {
initMap.put(
agentTopicName,
"0"
);
}
);
// build the redis all agent healthy map struct
HashMap<String, String> initMap = new HashMap<>(32);
ALL_AGENT_TOPIC_NAME_LIST
.stream()
.forEach(
agentTopicName -> {
initMap.put(
agentTopicName,
"0"
);
}
);
initMap.put(
"updateTime",
TimeUtils.currentTimeString()
);
initMap.put(
"updateTime",
TimeUtils.currentTimeString()
);
// cache this map struct
AGENT_HEALTHY_INIT_MAP = initMap;
// cache this map struct
AGENT_HEALTHY_INIT_MAP = initMap;
// create the healthy redis structure
redisTemplate
.opsForHash()
.putAll(
ALL_AGENT_STATUS_REDIS_KEY,
initMap
);
// create the healthy redis structure
redisTemplate
.opsForHash()
.putAll(
ALL_AGENT_STATUS_REDIS_KEY,
initMap
);
}
}
private void buildAndSendAgentHealthMessage() {
@@ -161,94 +131,14 @@ public class MonitorAllAgentStatus {
private void updateAllAgentHealthyStatus() {
List statusList = redisTemplate
.opsForHash()
.multiGet(
ALL_AGENT_STATUS_REDIS_KEY,
ALL_AGENT_TOPIC_NAME_LIST
);
// current log to console is ok
// 结构保存为agentStatusMap ==> agent-topic-name : STATUS(healthy, failed, unknown)
HashMap<String, String> agentStatusMap = new HashMap<>(32);
for (int i = 0; i < ALL_AGENT_TOPIC_NAME_LIST.size(); i++) {
agentStatusMap.put(
ALL_AGENT_TOPIC_NAME_LIST.get(i),
uniformHealthyStatus(String.valueOf(statusList.get(i)))
);
}
String currentTimeString = TimeUtils.currentTimeString();
log.info(
"[ AGENT HEALTHY CHECK ] time is {} , result are => {}",
currentTimeString,
agentStatusMap
);
// 2023-01-16
ALL_AGENT_HEALTHY_STATUS_MAP.clear();
ALL_AGENT_HEALTHY_STATUS_MAP.putAll(agentStatusMap);
// 更新所有的缓存状态
serverBootUpEnvironment.updateAgentStatusMapCache();
// 2023-01-16
Map<String, List<String>> statusAgentListMap = agentStatusMap
.entrySet()
.stream()
.collect(
Collectors.groupingBy(
Map.Entry::getValue
)
)
.entrySet()
.stream()
.collect(
Collectors.toMap(
entry -> entry.getKey(),
entry -> entry
.getValue()
.stream()
.map(
Map.Entry::getKey
)
.collect(Collectors.toList())
)
);
HEALTHY_STATUS_AGENT_LIST_MAP.putAll(statusAgentListMap);
log.debug("Agent存活状态 状态-Agent名称-Map 已经更新了");
// help gc
agentStatusMap = null;
// Trigger调用Agent Metric 任务
ArrayList<String> allHealthyAgentTopicNames = new ArrayList<>(32);
for (int i = 0; i < statusList.size(); i++) {
if (statusList
.get(i)
.equals("1")) {
allHealthyAgentTopicNames.add(ALL_AGENT_TOPIC_NAME_LIST.get(i));
}
}
// 缓存相应的存活Agent
ALL_HEALTHY_AGENT_TOPIC_NAMES = allHealthyAgentTopicNames;
// 执行Metric上报任务
// 执行Metric上报定时任务
buildStatusScheduleTask.buildAgentMetricScheduleTask();
// init the healthy map
// 需要将所有的Agent的状态置为 "0"
ALL_AGENT_TOPIC_NAME_LIST
.stream()
.forEach(
agentTopicName -> {
AGENT_HEALTHY_INIT_MAP.put(
agentTopicName,
"0"
);
}
);
// update time
AGENT_HEALTHY_INIT_MAP.put(
"updateTime",
@@ -261,17 +151,7 @@ public class MonitorAllAgentStatus {
ALL_AGENT_STATUS_REDIS_KEY,
AGENT_HEALTHY_INIT_MAP
);
}
private String uniformHealthyStatus(String agentStatus) {
switch (agentStatus) {
case "0":
return AgentHealthyStatusEnum.FAILED.getStatus();
case "1":
return AgentHealthyStatusEnum.HEALTHY.getStatus();
default:
return AgentHealthyStatusEnum.UNKNOWN.getStatus();
}
}