BIZ-28: deploy monitoring dashboard + alert config
Co-authored-by: multica-agent <github@multica.ai>
This commit is contained in:
@@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Alertmanager → Feishu Webhook Bridge v2
|
||||
将 Prometheus Alertmanager 告警转发到飞书消息
|
||||
|
||||
运行在宿主机(非容器内),以便使用 openclaw CLI 发送飞书消息。
|
||||
|
||||
路由规则:
|
||||
- severity=critical → 通知 Vincent(飞书 ou_8782990ad09c2bd7732a5ef6b23b8508)
|
||||
- severity=warning → 通知 COO(飞书 ou_9f73b4e54af59f038e2b754793ea0908)
|
||||
"""
|
||||
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
|
||||
# 飞书 Webhook URL(通过环境变量配置,可选)
|
||||
FEISHU_WEBHOOK_CRITICAL = os.environ.get("FEISHU_WEBHOOK_CRITICAL", "")
|
||||
FEISHU_WEBHOOK_WARNING = os.environ.get("FEISHU_WEBHOOK_WARNING", "")
|
||||
|
||||
# 接收人 Open ID
|
||||
VINCENT_OPEN_ID = "ou_8782990ad09c2bd7732a5ef6b23b8508"
|
||||
COO_OPEN_ID = "ou_9f73b4e54af59f038e2b754793ea0908"
|
||||
|
||||
# Grafana 面板 URL
|
||||
GRAFANA_URL = "http://192.168.1.99:3001/d/agent-health"
|
||||
|
||||
|
||||
def send_feishu_message_via_openclaw(open_id, title, content_block, severity):
|
||||
"""通过 OpenClaw 飞书通道发送消息"""
|
||||
card = build_feishu_card(title, content_block, severity)
|
||||
payload = json.dumps({
|
||||
"receive_id": open_id,
|
||||
"msg_type": "interactive",
|
||||
"content": json.dumps(card),
|
||||
})
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["openclaw", "message", "send",
|
||||
"--channel", "feishu",
|
||||
"--target", open_id,
|
||||
"--message", payload],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if result.returncode == 0:
|
||||
print(f"[bridge] Feishu sent to {open_id[:20]}...")
|
||||
else:
|
||||
print(f"[bridge] Feishu error: {result.stderr[:200]}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"[bridge] Feishu exception: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def send_feishu_webhook(webhook_url, title, content_block, severity):
|
||||
"""通过飞书 Webhook URL 发送"""
|
||||
if not webhook_url:
|
||||
return
|
||||
|
||||
card = build_feishu_card(title, content_block, severity)
|
||||
payload = json.dumps({"msg_type": "interactive", "content": json.dumps(card)}).encode("utf-8")
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
webhook_url,
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST"
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
print(f"[bridge] Webhook sent: {resp.status}")
|
||||
except Exception as e:
|
||||
print(f"[bridge] Webhook error: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def build_feishu_card(title, content, severity):
|
||||
"""构建飞书消息卡片"""
|
||||
color_map = {
|
||||
"critical": "red",
|
||||
"warning": "yellow",
|
||||
"info": "blue",
|
||||
}
|
||||
color = color_map.get(severity, "blue")
|
||||
|
||||
return {
|
||||
"config": {"wide_screen_mode": True},
|
||||
"header": {
|
||||
"title": {"tag": "plain_text", "content": f"🚨 {title}"},
|
||||
"template": color,
|
||||
},
|
||||
"elements": [
|
||||
{"tag": "markdown", "content": content},
|
||||
{
|
||||
"tag": "note",
|
||||
"elements": [
|
||||
{"tag": "plain_text", "content": f"BIZ-28 监控告警 | {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def handle_alert(alert_data):
|
||||
"""处理告警并发通知"""
|
||||
alerts = alert_data.get("alerts", [])
|
||||
for alert in alerts:
|
||||
labels = alert.get("labels", {})
|
||||
annotations = alert.get("annotations", {})
|
||||
status = alert.get("status", "firing")
|
||||
severity = labels.get("severity", "warning")
|
||||
alertname = labels.get("alertname", "Unknown")
|
||||
summary = annotations.get("summary", alertname)
|
||||
description = annotations.get("description", "")
|
||||
|
||||
title = f"[{severity.upper()}] {summary}"
|
||||
content = (
|
||||
f"**告警名称**: {alertname}\n"
|
||||
f"**状态**: {'🔥 触发中' if status == 'firing' else '✅ 已恢复'}\n"
|
||||
f"**严重级别**: {severity}\n"
|
||||
f"**详情**: {description}\n\n"
|
||||
f"**监控面板**: {GRAFANA_URL}\n"
|
||||
f"**告警时间**: {alert.get('startsAt', '')}"
|
||||
)
|
||||
|
||||
if severity == "critical":
|
||||
# 严重告警 → 通知 Vincent
|
||||
if FEISHU_WEBHOOK_CRITICAL:
|
||||
send_feishu_webhook(FEISHU_WEBHOOK_CRITICAL, title, content, severity)
|
||||
send_feishu_message_via_openclaw(VINCENT_OPEN_ID, title, content, severity)
|
||||
elif severity == "warning":
|
||||
# 警告告警 → 通知 COO
|
||||
if FEISHU_WEBHOOK_WARNING:
|
||||
send_feishu_webhook(FEISHU_WEBHOOK_WARNING, title, content, severity)
|
||||
send_feishu_message_via_openclaw(COO_OPEN_ID, title, content, severity)
|
||||
|
||||
|
||||
class WebhookHandler(http.server.BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length)
|
||||
|
||||
try:
|
||||
alert_data = json.loads(body)
|
||||
handle_alert(alert_data)
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
response = json.dumps({"status": "ok"}).encode()
|
||||
self.send_header("Content-Length", len(response))
|
||||
self.end_headers()
|
||||
self.wfile.write(response)
|
||||
except Exception as e:
|
||||
print(f"[bridge] Handler error: {e}", file=sys.stderr)
|
||||
self.send_response(500)
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == "/health":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
response = json.dumps({"status": "ok"}).encode()
|
||||
self.send_header("Content-Length", len(response))
|
||||
self.end_headers()
|
||||
self.wfile.write(response)
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(os.environ.get("WEBHOOK_PORT", 9094))
|
||||
server = http.server.HTTPServer(("0.0.0.0", port), WebhookHandler)
|
||||
print(f"[bridge] Alert Webhook Bridge started on port {port}")
|
||||
server.serve_forever()
|
||||
Reference in New Issue
Block a user