初始化项目
This commit is contained in:
374
agent-common/SplitProject/ranjing-python-devfusion/check.py
Normal file
374
agent-common/SplitProject/ranjing-python-devfusion/check.py
Normal file
@@ -0,0 +1,374 @@
|
||||
import os
|
||||
import subprocess
|
||||
import paho.mqtt.client as mqtt
|
||||
import json
|
||||
import time
|
||||
import threading
|
||||
import logging
|
||||
from config import *
|
||||
import datetime
|
||||
import schedule # 需要先安装: pip install schedule
|
||||
import yaml
|
||||
|
||||
|
||||
# 读取yaml配置
|
||||
def load_mqtt_config():
|
||||
config_path = os.getenv('CONFIG_PATH', 'config.yaml')
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
return config['mqtt'], config['topics']
|
||||
|
||||
|
||||
# 获取MQTT和topics配置
|
||||
mqtt_config, topics_config = load_mqtt_config()
|
||||
|
||||
# 设置日志配置
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('check.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
# 存储运行中的任务及其配置
|
||||
running_tasks = {}
|
||||
task_configs = {}
|
||||
|
||||
# 启动 Dev_Fusion.py 的命令模板
|
||||
fusion_command_template = f"nohup python Dev_Fusion.py -t {{task_id}} -g {DEV_FUSION_G} -i {DEV_FUSION_I} > /dev/null 2> error.log &"
|
||||
|
||||
# 日志文件夹路径
|
||||
log_folder = "tasklog"
|
||||
os.makedirs(log_folder, exist_ok=True)
|
||||
|
||||
# 创建全局锁
|
||||
task_lock = threading.Lock()
|
||||
|
||||
|
||||
def compare_configs(old_config, new_config):
|
||||
"""
|
||||
比较两个配置是否有实质性差异
|
||||
返回 True 表示有差异,需要重启
|
||||
返回 False 表示无差异,只需转发
|
||||
"""
|
||||
try:
|
||||
# 1. 检查 devices 列表
|
||||
old_devices = old_config.get('devices', [])
|
||||
new_devices = new_config.get('devices', [])
|
||||
|
||||
if len(old_devices) != len(new_devices):
|
||||
return True
|
||||
|
||||
# 为每个设备创建一个关键信息元组进行比较
|
||||
def get_device_key(device):
|
||||
return (
|
||||
device.get('device_id'),
|
||||
device.get('device_topic'),
|
||||
device.get('device_type'),
|
||||
device.get('reference_point')
|
||||
|
||||
)
|
||||
|
||||
old_device_keys = {get_device_key(d) for d in old_devices}
|
||||
new_device_keys = {get_device_key(d) for d in new_devices}
|
||||
|
||||
# 如果设备的关键信息有变化,需要重启
|
||||
if old_device_keys != new_device_keys:
|
||||
return True
|
||||
|
||||
# 2. 检查参考点
|
||||
old_ref = old_config.get('reference_point')
|
||||
new_ref = new_config.get('reference_point')
|
||||
|
||||
if old_ref != new_ref:
|
||||
return True
|
||||
|
||||
# 3. 其他参数(如 sampling_rate)的变化不需要重启
|
||||
logging.info("No critical configuration changes detected")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error comparing configs: {str(e)}")
|
||||
return True # 出错时视为有差异,安全起见重启实例
|
||||
|
||||
|
||||
def stop_task(task_id):
|
||||
"""停止指定的任务实例"""
|
||||
try:
|
||||
if task_id in running_tasks:
|
||||
process = running_tasks[task_id]
|
||||
# 使用 pkill 命令终止对应的 Python 进程
|
||||
subprocess.run(f"pkill -f 'python.*Dev_Fusion.py.*-t {task_id}'", shell=True)
|
||||
process.wait(timeout=5) # 等待进程结束
|
||||
del running_tasks[task_id]
|
||||
del task_configs[task_id]
|
||||
logging.info(f"Task {task_id} stopped successfully")
|
||||
except Exception as e:
|
||||
logging.error(f"Error stopping task {task_id}: {str(e)}")
|
||||
|
||||
|
||||
# 多线程处理函数
|
||||
def handle_task(client, task_id, payload):
|
||||
try:
|
||||
with task_lock: # 使用锁保护共享资源
|
||||
data = json.loads(payload)
|
||||
sensor_topic = topics_config['sensor_topic'].replace("+", task_id)
|
||||
|
||||
# 记录配置更新
|
||||
log_file = os.path.join(log_folder, f"received_tasklog_{task_id}.txt")
|
||||
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
def log_config_update(action):
|
||||
with open(log_file, "a") as f:
|
||||
f.write(f"\n=== Configuration Update at {current_time} ===\n")
|
||||
f.write(f"Task ID: {task_id}\n")
|
||||
f.write(f"MQTT_TOPIC: {topics_config['mqtt_topic']}\n")
|
||||
f.write(f"Payload: {payload}\n")
|
||||
f.write(f"Action: {action}\n")
|
||||
f.write("=" * 50 + "\n")
|
||||
|
||||
# 检查任务是否已经在运行
|
||||
if task_id in running_tasks:
|
||||
# 检查是否有存储的配置
|
||||
if task_id in task_configs:
|
||||
# 比较新旧配置
|
||||
if compare_configs(task_configs[task_id], data):
|
||||
logging.info(f"Configuration changed for task {task_id}, restarting...")
|
||||
stop_task(task_id)
|
||||
log_config_update("Configuration changed, restarting instance")
|
||||
start_new_instance(client, task_id, payload, data)
|
||||
else:
|
||||
# 配置无变化,只转发消息
|
||||
logging.info(f"No configuration change for task {task_id}, forwarding message")
|
||||
log_config_update("Message forwarded (no critical changes)")
|
||||
client.publish(sensor_topic, payload)
|
||||
else:
|
||||
# 没有存储的配置,存储新配置并转发
|
||||
logging.info(f"No stored config for task {task_id}, storing first config")
|
||||
task_configs[task_id] = data
|
||||
log_config_update("First config stored and forwarded")
|
||||
client.publish(sensor_topic, payload)
|
||||
else:
|
||||
# 任务不存在,启动新实例
|
||||
log_config_update("New instance started")
|
||||
start_new_instance(client, task_id, payload, data)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error handling task {task_id}: {str(e)}")
|
||||
|
||||
|
||||
def start_new_instance(client, task_id, payload, config):
|
||||
"""启动新的 Dev_Fusion 实例"""
|
||||
try:
|
||||
# 启动 Dev_Fusion.py 实例
|
||||
fusion_command = fusion_command_template.format(task_id=task_id)
|
||||
process = subprocess.Popen(fusion_command, shell=True)
|
||||
running_tasks[task_id] = process
|
||||
task_configs[task_id] = config
|
||||
|
||||
logging.info(f"Dev_Fusion.py started successfully for Task ID {task_id}")
|
||||
|
||||
# 保存日志,使用追加模式
|
||||
log_file = os.path.join(log_folder, f"received_tasklog_{task_id}.txt")
|
||||
current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
with open(log_file, "a") as f: # 使用 "a" 模式追加内容
|
||||
f.write(f"\n=== Configuration Update at {current_time} ===\n")
|
||||
f.write(f"Task ID: {task_id}\n")
|
||||
f.write(f"MQTT_TOPIC: {topics_config['mqtt_topic']}\n")
|
||||
f.write(f"Payload: {payload}\n")
|
||||
# 记录是否触发了重启
|
||||
f.write("Action: New instance started\n")
|
||||
f.write("=" * 50 + "\n")
|
||||
|
||||
# 等待实例启动
|
||||
time.sleep(0.5)
|
||||
|
||||
# 发送配置
|
||||
sensor_topic = topics_config['sensor_topic'].replace("+", task_id)
|
||||
client.publish(sensor_topic, payload)
|
||||
logging.info(f"Configuration sent to {sensor_topic}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error starting new instance for task {task_id}: {str(e)}")
|
||||
if task_id in running_tasks:
|
||||
del running_tasks[task_id]
|
||||
del task_configs[task_id]
|
||||
|
||||
|
||||
# MQTT 回调函数
|
||||
def on_connect(client, userdata, flags, rc):
|
||||
if rc == 0:
|
||||
logging.info("Connected to MQTT broker")
|
||||
client.subscribe(topics_config['mqtt_topic']) # 使用yaml中的topic
|
||||
else:
|
||||
logging.error(f"Connection failed with code {rc}: {DISCONNECT_REASONS.get(rc, 'Unknown error')}")
|
||||
|
||||
|
||||
def on_message(client, userdata, msg):
|
||||
try:
|
||||
payload = msg.payload.decode("utf-8")
|
||||
logging.info(f"Received message on topic {msg.topic}")
|
||||
|
||||
data = json.loads(payload)
|
||||
task_id = data.get("task_id")
|
||||
|
||||
if task_id:
|
||||
thread = threading.Thread(target=handle_task, args=(client, task_id, payload))
|
||||
thread.start()
|
||||
else:
|
||||
logging.warning("Received message without task_id")
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logging.error("Received message is not valid JSON")
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing message: {str(e)}")
|
||||
|
||||
|
||||
def check_running_instances():
|
||||
"""检查系统中已经运行的 Dev_Fusion 实例"""
|
||||
try:
|
||||
# 使用 ps 命令查找运行中的 Dev_Fusion.py 实例
|
||||
result = subprocess.run("ps aux | grep 'python.*Dev_Fusion.py' | grep -v grep",
|
||||
shell=True, capture_output=True, text=True)
|
||||
|
||||
found_instances = []
|
||||
for line in result.stdout.splitlines():
|
||||
# 从命令行参数中提取 task_id
|
||||
if '-t' in line:
|
||||
parts = line.split()
|
||||
for i, part in enumerate(parts):
|
||||
if part == '-t' and i + 1 < len(parts):
|
||||
task_id = parts[i + 1]
|
||||
pid = parts[1] # 进程 ID 通常在第二列
|
||||
found_instances.append((task_id, pid))
|
||||
|
||||
for task_id, pid in found_instances:
|
||||
logging.info(f"Found running instance for task {task_id}, pid: {pid}")
|
||||
|
||||
# 读取该任务的最新配置
|
||||
config = read_latest_config(task_id)
|
||||
if config:
|
||||
# 将已运行的实例添加到 running_tasks
|
||||
running_tasks[task_id] = subprocess.Popen(['echo', ''], stdout=subprocess.PIPE)
|
||||
running_tasks[task_id].pid = int(pid)
|
||||
task_configs[task_id] = config
|
||||
logging.info(
|
||||
f"Successfully loaded config for task {task_id} from tasklog/received_tasklog_{task_id}.txt")
|
||||
else:
|
||||
logging.warning(f"No valid config found for task {task_id}, stopping instance...")
|
||||
subprocess.run(f"pkill -f 'python.*Dev_Fusion.py.*-t {task_id}'", shell=True)
|
||||
logging.info(f"Stopped instance {task_id} due to missing config")
|
||||
|
||||
logging.info(f"Finished checking instances. Loaded {len(running_tasks)} tasks with valid configs")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error checking running instances: {str(e)}")
|
||||
|
||||
|
||||
def read_latest_config(task_id):
|
||||
"""读取指定任务的最新配置"""
|
||||
try:
|
||||
log_file = os.path.join(log_folder, f"received_tasklog_{task_id}.txt")
|
||||
if not os.path.exists(log_file):
|
||||
logging.error(f"No log file found for task {task_id}")
|
||||
return None
|
||||
|
||||
with open(log_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# 按配置更新块分割
|
||||
updates = content.split('=== Configuration Update at')
|
||||
if not updates:
|
||||
return None
|
||||
|
||||
# 获取最后一个更新块
|
||||
latest_update = updates[-1]
|
||||
|
||||
# 提取 Payload
|
||||
payload_start = latest_update.find('Payload: ') + len('Payload: ')
|
||||
payload_end = latest_update.find('\nAction:')
|
||||
if payload_end == -1: # 如果没有 Action 行
|
||||
payload_end = latest_update.find('\n===')
|
||||
|
||||
if payload_start > 0 and payload_end > payload_start:
|
||||
payload = latest_update[payload_start:payload_end].strip()
|
||||
return json.loads(payload)
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading latest config for task {task_id}: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def restart_all_instances():
|
||||
"""重启所有运行中的实例"""
|
||||
logging.info("Scheduled restart: Beginning restart of all instances")
|
||||
|
||||
# 复制当前运行的任务列表,因为我们会修改 running_tasks
|
||||
tasks_to_restart = list(running_tasks.keys())
|
||||
|
||||
for task_id in tasks_to_restart:
|
||||
try:
|
||||
# 读取最新配置
|
||||
config = read_latest_config(task_id)
|
||||
if not config:
|
||||
logging.error(f"Could not find latest config for task {task_id}, skipping restart")
|
||||
continue
|
||||
|
||||
# 停止当前实例
|
||||
logging.info(f"Stopping task {task_id} for scheduled restart")
|
||||
stop_task(task_id)
|
||||
|
||||
# 将配置转换为 JSON 字符串
|
||||
payload = json.dumps(config)
|
||||
|
||||
# 启动新实例
|
||||
logging.info(f"Starting new instance for task {task_id} with latest config")
|
||||
start_new_instance(mqtt_client, task_id, payload, config)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error restarting task {task_id}: {str(e)}")
|
||||
|
||||
|
||||
def setup_scheduled_restart(restart_time="03:00"):
|
||||
"""设置定时重启任务"""
|
||||
schedule.every().day.at(restart_time).do(restart_all_instances)
|
||||
|
||||
def run_schedule():
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(30) # 每30秒检查一次
|
||||
|
||||
# 启动调度器线程
|
||||
scheduler_thread = threading.Thread(target=run_schedule, daemon=True)
|
||||
scheduler_thread.start()
|
||||
|
||||
|
||||
def main():
|
||||
global mqtt_client # 添加全局变量以在重启时使用
|
||||
|
||||
# 在启动时检查已运行的实例
|
||||
check_running_instances()
|
||||
|
||||
# 创建 MQTT 客户端
|
||||
mqtt_client = mqtt.Client()
|
||||
mqtt_client.on_connect = on_connect
|
||||
mqtt_client.on_message = on_message
|
||||
mqtt_client.username_pw_set(mqtt_config['username'], mqtt_config['password'])
|
||||
|
||||
# 设置定时重启(默认凌晨3点)
|
||||
setup_scheduled_restart()
|
||||
|
||||
while True:
|
||||
try:
|
||||
mqtt_client.connect(mqtt_config['broker'], mqtt_config['port'], 60)
|
||||
mqtt_client.loop_forever()
|
||||
except Exception as e:
|
||||
logging.error(f"MQTT connection error: {str(e)}")
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user