AlertManager 发送告警概要
将告警转发到企业微信有两点需要配置,Prometheus 要添加将告警转发给 AlertManager 配置,AlertManager 要添加将消息转发给自定义开发的一个钩子的配置,由钩子接收信息并处理后转发给企业微信群里机器人。
在 Prometheus 配置中添加 AlertManager
# cat prometheus.yml
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
在 AlertManager 中添加钩子的 HTTP 地址
# cat alertmanager.yml
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
使用 stress 进行 cpu 压测,从而产生告警信息
# yum install -y stress
# stress -c 2
确认数据格式
此处钩子地址是 localhost,端口是 5001,先开发一个服务接收一下信息并打印,查看告警数据格式
package main
import (
"fmt"
"github.com/gin-gonic/gin"
)
func main() {
route := gin.Default()
route.POST("/", receiveHook)
route.Run(":5001")
}
func receiveHook(c *gin.Context) {
body, err := c.GetRawData()
if err != nil {
c.String(400, err.Error())
return
}
fmt.Println(string(body))
c.String(200, string(body))
}
代码前台执行后,fmt.Println 会打印数据格式,之后的代码就是对数据格式进行转换并转发
{
"receiver":"web\\.hook",
"status":"firing",
"alerts":[
{
"status":"firing",
"labels":{
"alertname":"node-cpu-used-percent-high-5",
"desc":"总CPU使用率",
"instance":"localhost:9100",
"unit":"%"
},
"annotations":{
"description":"instance: localhost:9100 CPU 使用率高于5%, 时间已经5分钟了。",
"notify":"Info",
"summary":"instance: localhost:9100 CPU 使用率过高",
"value":"100"
},
"startsAt":"2023-07-13T02:56:18.783Z",
"endsAt":"0001-01-01T00:00:00Z",
"generatorURL":"http://grafana:9090/graph?g0.expr=node_exporter%3Acpu%3Atotal%3Apercent+%3E+5\u0026g0.tab=1",
"fingerprint":"1284c73ab2eeda96"
}
],
"groupLabels":{
"alertname":"node-cpu-used-percent-high-5"
},
"commonLabels":{
"alertname":"node-cpu-used-percent-high-5",
"desc":"总CPU使用率",
"instance":"localhost:9100",
"unit":"%"
},
"commonAnnotations":{
"description":"instance: localhost:9100 CPU 使用率高于5%, 时间已经5分钟了。",
"notify":"Info",
"summary":"instance: localhost:9100 CPU 使用率过高",
"value":"100"
},
"externalURL":"http://grafana:9093",
"version":"4",
"groupKey":"{}:{alertname=\"node-cpu-used-percent-high-5\"}",
"truncatedAlerts":0
}
对接企业微信群机器人
首先在企业微信群里创建一个机器人,获取到机器人的 webhook 后,打开企业微信开发者文档,查看发送给机器人的请求格式,这里选择 markdown 格式
完整代码
package main
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gin-gonic/gin"
"io/ioutil"
"net/http"
)
const qyapi = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=769d17c2-fc86-4273-b6b1-0389fb5ed57d"
type alertManager struct {
Status string `json:"status"`
Alerts []alert `json:"alerts"`
}
type alert struct {
Status string `json:"status"`
Labels map[string]string `json:"labels"`
Annotations map[string]string `json:"annotations"`
}
type message struct {
Msgtype string `json:"msgtype"`
MarkDown struct {
Content string `json:"content"`
} `json:"markdown"`
}
func main() {
route := gin.Default()
route.POST("/", receiveHook)
route.Run(":5001")
}
func receiveHook(c *gin.Context) {
var am alertManager
body, err := c.GetRawData()
if err != nil {
c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
return
}
if err = json.Unmarshal(body, &am); err != nil {
c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
return
}
if err = transmitHook(&am); err != nil {
c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
return
}
c.JSON(200, gin.H{"code": 200, "msg": "success"})
}
func transmitHook(a *alertManager) error {
if a.Status != "firing" {
fmt.Println("The alarm status is not firing")
return nil
}
for _, v := range a.Alerts {
if v.Status != "firing" {
fmt.Println("The alarm status is not firing")
return nil
}
m := &message{
Msgtype: "markdown",
MarkDown: struct {
Content string `json:"content"`
}(struct{ Content string }{Content: fmt.Sprintf("<font color=\"red\">告警内容:</font>%s\n<font color=\"red\">告警说明:</font>%s\n<font color=\"red\">告警值:</font>%s", v.Labels["alertname"], v.Annotations["description"], v.Annotations["value"])}),
}
byteBuf := bytes.NewBuffer([]byte{})
encoder := json.NewEncoder(byteBuf)
encoder.SetEscapeHTML(false)
if err := encoder.Encode(&m); err != nil {
return err
}
req, err := http.NewRequest("POST", qyapi, byteBuf)
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
fmt.Println(string(body))
}
return nil
}
完整代码只是测试功能,编写生产代码还需要详细设计。