Go语言AlertManager告警实时告警系统1. AlertManager概述AlertManager处理Prometheus发送的告警支持分组、抑制、静默等高级功能。2. 告警规则package alert type Rule struct { Name string json:name Expr string json:expr For string json:for Labels map[string]string json:labels Annotations map[string]string json:annotations } type RuleGroup struct { Name string json:name Rules []Rule json:rules } func NewRule(name, expr, forDuration string) *Rule { return Rule{ Name: name, Expr: expr, For: forDuration, Labels: make(map[string]string), Annotations: make(map[string]string), } } func (r *Rule) WithLabel(key, value string) *Rule { r.Labels[key] value return r } func (r *Rule) WithAnnotation(key, value string) *Rule { r.Annotations[key] value return r } const ( HighCPUUsage avg(rate(container_cpu_usage_seconds_total{name~.}[5m])) by (name) 0.8 HighMemoryUsage avg(container_memory_usage_bytes{name~.}) by (name) / avg(container_spec_memory_limit_bytes{name~.}) by (name) 0.8 HighErrorRate rate(http_requests_total{status~5..}[5m]) 0.05 HighLatency histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) 1 ) func DefaultRules() []*Rule { return []*Rule{ NewRule(HighCPUUsage, HighCPUUsage, 5m). WithLabel(severity, warning). WithAnnotation(summary, High CPU usage detected), NewRule(HighMemoryUsage, HighMemoryUsage, 5m). WithLabel(severity, warning). WithAnnotation(summary, High memory usage detected), NewRule(HighErrorRate, HighErrorRate, 5m). WithLabel(severity, critical). WithAnnotation(summary, High error rate detected), NewRule(HighLatency, HighLatency, 5m). WithLabel(severity, warning). WithAnnotation(summary, High latency detected), } }3. Webhook通知type AlertWebhook struct { receiver string status string alerts []Alert } type Alert struct { Status string json:status Labels map[string]string json:labels Annotations map[string]string json:annotations StartsAt time.Time json:startsAt EndsAt time.Time json:endsAt } func NewAlertWebhook(payload []byte) (*AlertWebhook, error) { var wh AlertWebhook err : json.Unmarshal(payload, wh) if err ! nil { return nil, err } return wh, nil } func (wh *AlertWebhook) Handle() error { switch wh.status { case firing: return wh.handleFiring() case resolved: return wh.handleResolved() } return nil } func (wh *AlertWebhook) handleFiring() error { for _, alert : range wh.alerts { fmt.Printf(Alert Firing: %s - %s\n, alert.Labels[alertname], alert.Annotations[summary]) } return nil } func (wh *AlertWebhook) handleResolved() error { for _, alert : range wh.alerts { fmt.Printf(Alert Resolved: %s - %s\n, alert.Labels[alertname], alert.Annotations[summary]) } return nil }4. 告警处理器type AlertHandler struct { notifiers []Notifier } type Notifier interface { Notify(alert *Alert) error } type EmailNotifier struct { smtpHost string smtpPort int from string to []string } func (n *EmailNotifier) Notify(alert *Alert) error { subject : fmt.Sprintf([%s] %s, alert.Labels[severity], alert.Labels[alertname]) body : fmt.Sprintf(Alert: %s\nSummary: %s\n, alert.Labels[alertname], alert.Annotations[summary]) return sendEmail(n.smtpHost, n.smtpPort, n.from, n.to, subject, body) } type SlackNotifier struct { webhookURL string channel string } func (n *SlackNotifier) Notify(alert *Alert) error { payload : map[string]interface{}{ channel: n.channel, text: fmt.Sprintf([%s] %s: %s, alert.Labels[severity], alert.Labels[alertname], alert.Annotations[summary]), } return postJSON(n.webhookURL, payload) } func NewAlertHandler() *AlertHandler { return AlertHandler{ notifiers: make([]Notifier, 0), } } func (h *AlertHandler) AddNotifier(notifier Notifier) { h.notifiers append(h.notifiers, notifier) } func (h *AlertHandler) HandleWebhook(payload []byte) error { wh, err : NewAlertWebhook(payload) if err ! nil { return err } for _, alert : range wh.alerts { for _, notifier : range h.notifiers { if err : notifier.Notify(alert); err ! nil { return err } } } return nil }5. 总结AlertManager提供了强大的告警能力通过合理的告警规则设计和多渠道通知可以实现高效的故障响应。