更新RMDC系统的模块SKILL
This commit is contained in:
127
1-AgentSkills/managing-observability/SKILL.md
Normal file
127
1-AgentSkills/managing-observability/SKILL.md
Normal file
@@ -0,0 +1,127 @@
|
||||
---
|
||||
name: managing-observability
|
||||
description: "Guides observability implementation including structured logging, metrics, tracing, and audit log alignment for RMDC system. Triggered when adding log statements, defining metrics, implementing traces, or ensuring audit compliance. Keywords: structured log, metrics, trace, audit, Prometheus, OpenTelemetry, rmdc-audit-log."
|
||||
allowed-tools:
|
||||
- Read
|
||||
- Glob
|
||||
- Grep
|
||||
- Bash
|
||||
argument-hint: "$ARGUMENTS: <aspect> [module] — aspect: logging|metrics|tracing|audit"
|
||||
---
|
||||
|
||||
# managing-observability
|
||||
|
||||
## 概述
|
||||
本 Skill 指导 RMDC 系统的可观测性实现,确保日志、指标、追踪与审计的一致性。
|
||||
|
||||
## 动态上下文注入
|
||||
|
||||
### 查找日志调用
|
||||
!`grep -rn "log\.\(Info\|Error\|Warn\|Debug\)" --include="*.go" | head -20`
|
||||
|
||||
### 查找审计相关代码
|
||||
!`grep -rn "audit\|Audit\|AuditLog" --include="*.go" | head -20`
|
||||
|
||||
---
|
||||
|
||||
## Plan(规划阶段)
|
||||
|
||||
### 可观测性维度
|
||||
| 维度 | 工具 | 对齐模块 |
|
||||
|:---|:---|:---|
|
||||
| 日志 | 结构化日志 | rmdc-audit-log |
|
||||
| 指标 | Prometheus | - |
|
||||
| 追踪 | OpenTelemetry | - |
|
||||
| 审计 | PostgreSQL | rmdc-audit-log |
|
||||
|
||||
### 决策点
|
||||
- [ ] 日志级别是否合适?
|
||||
- [ ] 是否需要添加审计记录?
|
||||
- [ ] 指标命名是否符合规范?
|
||||
- [ ] trace_id 是否正确传递?
|
||||
|
||||
---
|
||||
|
||||
## Verify(验证清单)
|
||||
|
||||
### 日志规范检查
|
||||
- [ ] 使用结构化日志格式
|
||||
- [ ] 包含 request_id / trace_id
|
||||
- [ ] 敏感信息已脱敏
|
||||
- [ ] 日志级别正确
|
||||
|
||||
### 审计对齐检查
|
||||
- [ ] 关键操作有审计记录
|
||||
- [ ] 审计字段完整(who/when/what/where)
|
||||
- [ ] 审计记录不可篡改
|
||||
- [ ] 与 rmdc-audit-log 格式一致
|
||||
|
||||
### 验证命令
|
||||
```bash
|
||||
# 检查日志调用规范
|
||||
grep -rn "log\." --include="*.go" | grep -v "WithFields" | head -20
|
||||
|
||||
# 检查审计记录
|
||||
grep -rn "AuditLog\|audit" --include="*.go" | head -20
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execute(执行步骤)
|
||||
|
||||
### 添加结构化日志
|
||||
```go
|
||||
import log "github.com/sirupsen/logrus"
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"user_id": userID,
|
||||
"action": "login",
|
||||
"request_id": requestID,
|
||||
}).Info("用户登录成功")
|
||||
```
|
||||
|
||||
### 添加审计记录
|
||||
```go
|
||||
auditLog.Record(AuditEntry{
|
||||
UserID: userID,
|
||||
Action: "UPDATE_USER",
|
||||
ResourceID: targetUserID,
|
||||
Details: changes,
|
||||
Timestamp: time.Now(),
|
||||
IP: clientIP,
|
||||
})
|
||||
```
|
||||
|
||||
### 添加 Prometheus 指标
|
||||
```go
|
||||
var loginCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "rmdc_user_auth_login_total",
|
||||
Help: "Total number of login attempts",
|
||||
},
|
||||
[]string{"status"},
|
||||
)
|
||||
|
||||
// 使用
|
||||
loginCounter.WithLabelValues("success").Inc()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pitfalls(常见坑)
|
||||
|
||||
1. **日志泄露敏感信息**:密码、Token、身份证号等未脱敏直接打印。
|
||||
2. **审计字段缺失**:无法追溯操作人(user_id)或操作内容(details)。
|
||||
3. **日志级别滥用**:DEBUG 日志在生产环境大量输出影响性能。
|
||||
4. **审计记录可被删除**:审计表需要设置写保护,禁止 DELETE/UPDATE。
|
||||
5. **trace_id 未传递**:跨服务调用时未将 trace_id 传递到下游,无法串联请求链路。
|
||||
6. **指标命名不规范**:未遵循 `模块_资源_动作_单位` 格式。
|
||||
|
||||
---
|
||||
|
||||
## 相关文件
|
||||
| 用途 | 路径 |
|
||||
|:---|:---|
|
||||
| 日志格式 | [reference/log-format.md](reference/log-format.md) |
|
||||
| 指标命名 | [reference/metrics-naming.md](reference/metrics-naming.md) |
|
||||
| 审计对齐 | [reference/audit-alignment.md](reference/audit-alignment.md) |
|
||||
@@ -0,0 +1,85 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// 结构化日志示例
|
||||
|
||||
// LogUserAction 记录用户操作日志
|
||||
func LogUserAction(userID uint, action string, requestID string, details map[string]interface{}) {
|
||||
fields := log.Fields{
|
||||
"user_id": userID,
|
||||
"action": action,
|
||||
"request_id": requestID,
|
||||
"timestamp": time.Now().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
// 合并详情字段
|
||||
for k, v := range details {
|
||||
fields[k] = v
|
||||
}
|
||||
|
||||
log.WithFields(fields).Info("用户操作")
|
||||
}
|
||||
|
||||
// LogAPIRequest 记录 API 请求日志
|
||||
func LogAPIRequest(requestID string, method string, path string, statusCode int, duration time.Duration, userID uint) {
|
||||
log.WithFields(log.Fields{
|
||||
"request_id": requestID,
|
||||
"method": method,
|
||||
"path": path,
|
||||
"status_code": statusCode,
|
||||
"duration_ms": duration.Milliseconds(),
|
||||
"user_id": userID,
|
||||
}).Info("API请求")
|
||||
}
|
||||
|
||||
// LogError 记录错误日志
|
||||
func LogError(requestID string, err error, context map[string]interface{}) {
|
||||
fields := log.Fields{
|
||||
"request_id": requestID,
|
||||
"error": err.Error(),
|
||||
}
|
||||
|
||||
for k, v := range context {
|
||||
fields[k] = v
|
||||
}
|
||||
|
||||
log.WithFields(fields).Error("发生错误")
|
||||
}
|
||||
|
||||
// 敏感信息脱敏工具
|
||||
|
||||
// MaskPhone 手机号脱敏
|
||||
func MaskPhone(phone string) string {
|
||||
if len(phone) >= 11 {
|
||||
return phone[:3] + "****" + phone[7:]
|
||||
}
|
||||
return "****"
|
||||
}
|
||||
|
||||
// MaskEmail 邮箱脱敏
|
||||
func MaskEmail(email string) string {
|
||||
atIndex := -1
|
||||
for i, c := range email {
|
||||
if c == '@' {
|
||||
atIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if atIndex > 2 {
|
||||
return email[:2] + "***" + email[atIndex:]
|
||||
}
|
||||
return "***@***"
|
||||
}
|
||||
|
||||
// MaskIDCard 身份证号脱敏
|
||||
func MaskIDCard(idCard string) string {
|
||||
if len(idCard) >= 18 {
|
||||
return idCard[:6] + "********" + idCard[14:]
|
||||
}
|
||||
return "******"
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
# 审计日志对齐规范
|
||||
|
||||
## 与 rmdc-audit-log 对齐
|
||||
|
||||
所有模块的审计记录必须与 `rmdc-audit-log` 模块的格式保持一致。
|
||||
|
||||
## 必须字段
|
||||
|
||||
| 字段 | 类型 | 说明 |
|
||||
|:---|:---|:---|
|
||||
| id | uint | 审计记录ID |
|
||||
| user_id | uint | 操作人ID |
|
||||
| username | string | 操作人用户名 |
|
||||
| action | string | 操作类型 |
|
||||
| resource_type | string | 资源类型 |
|
||||
| resource_id | string | 资源ID |
|
||||
| details | json | 操作详情 |
|
||||
| ip_address | string | 客户端IP |
|
||||
| user_agent | string | 客户端UA |
|
||||
| timestamp | timestamp | 操作时间 |
|
||||
| result | string | 操作结果 success/failed |
|
||||
|
||||
## 操作类型规范
|
||||
|
||||
| 模块 | 操作类型 |
|
||||
|:---|:---|
|
||||
| user-auth | USER_LOGIN, USER_LOGOUT, USER_CREATE, USER_UPDATE, USER_DELETE, PASSWORD_CHANGE, PERMISSION_GRANT |
|
||||
| jenkins-dac | BUILD_TRIGGER, BUILD_CANCEL, PERMISSION_CHANGE |
|
||||
| exchange-hub | COMMAND_SEND, COMMAND_COMPLETE |
|
||||
| watchdog | DEPLOYMENT_START, DEPLOYMENT_COMPLETE, TOTP_VERIFY |
|
||||
| project-mgmt | PROJECT_CREATE, PROJECT_UPDATE, AUTH_GRANT |
|
||||
| work-procedure | WORKFLOW_CREATE, WORKFLOW_APPROVE, WORKFLOW_REJECT |
|
||||
|
||||
## 审计表保护
|
||||
|
||||
审计表必须设置以下保护:
|
||||
1. 禁止 DELETE 操作
|
||||
2. 禁止 UPDATE 操作(除标记字段外)
|
||||
3. 定期备份
|
||||
4. 独立存储(建议)
|
||||
|
||||
```sql
|
||||
-- 创建只允许 INSERT 的触发器
|
||||
CREATE OR REPLACE FUNCTION prevent_audit_modify()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
RAISE EXCEPTION 'Audit log modification is not allowed';
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER audit_log_protect
|
||||
BEFORE UPDATE OR DELETE ON audit_logs
|
||||
FOR EACH ROW EXECUTE FUNCTION prevent_audit_modify();
|
||||
```
|
||||
|
||||
## 审计记录示例
|
||||
|
||||
```go
|
||||
// user-auth 模块
|
||||
audit.Record(audit.Entry{
|
||||
UserID: operatorID,
|
||||
Username: operatorName,
|
||||
Action: "USER_CREATE",
|
||||
ResourceType: "user",
|
||||
ResourceID: strconv.Itoa(int(newUser.ID)),
|
||||
Details: map[string]interface{}{
|
||||
"username": newUser.Username,
|
||||
"role": newUser.Role,
|
||||
},
|
||||
IPAddress: c.ClientIP(),
|
||||
UserAgent: c.Request.UserAgent(),
|
||||
Result: "success",
|
||||
})
|
||||
```
|
||||
58
1-AgentSkills/managing-observability/reference/log-format.md
Normal file
58
1-AgentSkills/managing-observability/reference/log-format.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# 日志格式规范
|
||||
|
||||
## 结构化日志
|
||||
|
||||
所有日志必须使用结构化格式,禁止字符串拼接。
|
||||
|
||||
### 正确示例
|
||||
```go
|
||||
log.WithFields(log.Fields{
|
||||
"user_id": userID,
|
||||
"action": "login",
|
||||
"request_id": requestID,
|
||||
"duration": duration.Milliseconds(),
|
||||
}).Info("用户登录成功")
|
||||
```
|
||||
|
||||
### 错误示例
|
||||
```go
|
||||
// ❌ 禁止
|
||||
log.Info("用户 " + username + " 登录成功,耗时 " + duration.String())
|
||||
```
|
||||
|
||||
## 必须字段
|
||||
|
||||
| 字段 | 说明 | 示例 |
|
||||
|:---|:---|:---|
|
||||
| request_id | 请求唯一标识 | uuid |
|
||||
| user_id | 操作用户ID | 123 |
|
||||
| action | 操作类型 | login, create_user |
|
||||
| duration | 耗时(毫秒) | 150 |
|
||||
|
||||
## 日志级别
|
||||
|
||||
| 级别 | 使用场景 |
|
||||
|:---|:---|
|
||||
| ERROR | 错误,需要关注和处理 |
|
||||
| WARN | 警告,可能的问题 |
|
||||
| INFO | 重要业务事件 |
|
||||
| DEBUG | 调试信息,生产环境关闭 |
|
||||
|
||||
## 敏感信息脱敏
|
||||
|
||||
必须脱敏的字段:
|
||||
- 密码(任何形式)
|
||||
- Token / Secret
|
||||
- 身份证号
|
||||
- 银行卡号
|
||||
- 手机号(中间四位)
|
||||
|
||||
```go
|
||||
// 脱敏工具
|
||||
func maskPhone(phone string) string {
|
||||
if len(phone) >= 11 {
|
||||
return phone[:3] + "****" + phone[7:]
|
||||
}
|
||||
return "****"
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,52 @@
|
||||
# 指标命名规范
|
||||
|
||||
## 命名格式
|
||||
|
||||
```
|
||||
rmdc_{module}_{resource}_{action}_{unit}
|
||||
```
|
||||
|
||||
## 命名规则
|
||||
|
||||
1. 全小写,下划线分隔
|
||||
2. 以 `rmdc_` 前缀开头
|
||||
3. 包含模块名
|
||||
4. 描述清晰的资源和动作
|
||||
5. 带单位后缀(如适用)
|
||||
|
||||
## 常用后缀
|
||||
|
||||
| 后缀 | 说明 | 示例 |
|
||||
|:---|:---|:---|
|
||||
| _total | 计数器 | rmdc_user_auth_login_total |
|
||||
| _seconds | 时间(秒) | rmdc_api_request_duration_seconds |
|
||||
| _bytes | 大小(字节) | rmdc_file_size_bytes |
|
||||
| _ratio | 比率 | rmdc_cache_hit_ratio |
|
||||
|
||||
## 示例
|
||||
|
||||
```go
|
||||
// 计数器
|
||||
rmdc_user_auth_login_total{status="success"}
|
||||
rmdc_user_auth_login_total{status="failed"}
|
||||
|
||||
// 直方图
|
||||
rmdc_user_auth_request_duration_seconds{endpoint="/api/auth/login"}
|
||||
|
||||
// Gauge
|
||||
rmdc_user_auth_active_sessions
|
||||
```
|
||||
|
||||
## 标签规范
|
||||
|
||||
- 标签名小写下划线
|
||||
- 标签值使用小写
|
||||
- 避免高基数标签(如 user_id)
|
||||
|
||||
```go
|
||||
// ✅ 正确
|
||||
loginCounter.WithLabelValues("success").Inc()
|
||||
|
||||
// ❌ 错误 - 高基数
|
||||
loginCounter.WithLabelValues(userID).Inc()
|
||||
```
|
||||
@@ -0,0 +1,128 @@
|
||||
#!/bin/bash
|
||||
# verify-observability.sh - 验证可观测性规范
|
||||
# 依赖: grep
|
||||
# 用法: ./verify-observability.sh [check-type]
|
||||
# check-type: all|logging|audit|metrics (默认 all)
|
||||
|
||||
set -e
|
||||
|
||||
CHECK_TYPE=${1:-all}
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="${SCRIPT_DIR}/../../.."
|
||||
|
||||
echo "=== RMDC 可观测性验证 ==="
|
||||
echo "检查类型: ${CHECK_TYPE}"
|
||||
echo ""
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m'
|
||||
|
||||
pass() { echo -e "${GREEN}[PASS]${NC} $1"; }
|
||||
fail() { echo -e "${RED}[FAIL]${NC} $1"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
|
||||
# 1. 日志规范检查
|
||||
check_logging() {
|
||||
echo "--- 日志规范检查 ---"
|
||||
|
||||
# 检查是否使用结构化日志
|
||||
UNSTRUCTURED=$(grep -rn 'log\.\(Info\|Error\|Warn\)f\?' --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -v "WithFields" | grep -v "_test.go" | head -10)
|
||||
|
||||
if [ -n "$UNSTRUCTURED" ]; then
|
||||
warn "发现非结构化日志调用:"
|
||||
echo "$UNSTRUCTURED"
|
||||
else
|
||||
pass "日志调用规范"
|
||||
fi
|
||||
|
||||
# 检查敏感信息泄露
|
||||
SENSITIVE=$(grep -rn 'password\|token\|secret' --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -i 'log\.' | grep -v "Mask\|mask\|****" | head -5)
|
||||
|
||||
if [ -n "$SENSITIVE" ]; then
|
||||
warn "可能泄露敏感信息的日志:"
|
||||
echo "$SENSITIVE"
|
||||
else
|
||||
pass "未发现敏感信息泄露"
|
||||
fi
|
||||
}
|
||||
|
||||
# 2. 审计规范检查
|
||||
check_audit() {
|
||||
echo "--- 审计规范检查 ---"
|
||||
|
||||
# 检查是否有审计记录
|
||||
AUDIT_CALLS=$(grep -rn "audit\|Audit" --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -v "_test.go" | wc -l)
|
||||
|
||||
if [ "$AUDIT_CALLS" -gt 0 ]; then
|
||||
pass "存在审计记录调用 ($AUDIT_CALLS 处)"
|
||||
else
|
||||
warn "未找到审计记录调用"
|
||||
fi
|
||||
|
||||
# 检查关键操作是否有审计
|
||||
for action in "Login\|login" "Create\|create" "Delete\|delete" "Update\|update"; do
|
||||
ACTION_AUDIT=$(grep -rn "$action" --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -i "audit" | head -1)
|
||||
if [ -n "$ACTION_AUDIT" ]; then
|
||||
pass "操作 $action 有审计"
|
||||
else
|
||||
warn "操作 $action 可能缺少审计"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# 3. 指标规范检查
|
||||
check_metrics() {
|
||||
echo "--- 指标规范检查 ---"
|
||||
|
||||
# 检查是否使用 prometheus
|
||||
PROM_USAGE=$(grep -rn "prometheus\|Prometheus" --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -v "_test.go" | wc -l)
|
||||
|
||||
if [ "$PROM_USAGE" -gt 0 ]; then
|
||||
pass "使用 Prometheus 指标 ($PROM_USAGE 处)"
|
||||
else
|
||||
warn "未找到 Prometheus 指标使用"
|
||||
fi
|
||||
|
||||
# 检查指标命名规范
|
||||
METRICS=$(grep -rn 'prometheus\.New' --include="*.go" "${PROJECT_ROOT}" 2>/dev/null | \
|
||||
grep -oE 'Name:\s*"[^"]+' | grep -oE '"[^"]+' | tr -d '"')
|
||||
|
||||
if [ -n "$METRICS" ]; then
|
||||
echo "发现的指标:"
|
||||
echo "$METRICS" | while read metric; do
|
||||
if echo "$metric" | grep -qE "^rmdc_"; then
|
||||
pass " $metric"
|
||||
else
|
||||
warn " $metric (建议以 rmdc_ 开头)"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# 执行检查
|
||||
case $CHECK_TYPE in
|
||||
logging) check_logging ;;
|
||||
audit) check_audit ;;
|
||||
metrics) check_metrics ;;
|
||||
all)
|
||||
check_logging
|
||||
echo ""
|
||||
check_audit
|
||||
echo ""
|
||||
check_metrics
|
||||
echo ""
|
||||
echo "=== 所有检查完成 ==="
|
||||
;;
|
||||
*)
|
||||
echo "未知检查类型: $CHECK_TYPE"
|
||||
echo "可选: all|logging|audit|metrics"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user