貔貅云原生

貔貅云原生

AlertManager对接WebHook

153
0
0
2023-09-27
AlertManager对接WebHook

AlertManager 发送告警概要

将告警转发到企业微信有两点需要配置,Prometheus 要添加将告警转发给 AlertManager 配置,AlertManager 要添加将消息转发给自定义开发的一个钩子的配置,由钩子接收信息并处理后转发给企业微信群里机器人。

在 Prometheus 配置中添加 AlertManager

# cat prometheus.yml
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - localhost:9093

在 AlertManager 中添加钩子的 HTTP 地址

# cat alertmanager.yml
receivers:
  - name: 'web.hook'
    webhook_configs:
      - url: 'http://127.0.0.1:5001/'

使用 stress 进行 cpu 压测,从而产生告警信息

# yum install -y stress
# stress -c 2

确认数据格式

此处钩子地址是 localhost,端口是 5001,先开发一个服务接收一下信息并打印,查看告警数据格式

package main

import (
	"fmt"
	"github.com/gin-gonic/gin"
)

func main() {
	route := gin.Default()
	route.POST("/", receiveHook)
	route.Run(":5001")
}

func receiveHook(c *gin.Context) {
	body, err := c.GetRawData()
	if err != nil {
		c.String(400, err.Error())
		return
	}

	fmt.Println(string(body))
	c.String(200, string(body))
}

代码前台执行后,fmt.Println 会打印数据格式,之后的代码就是对数据格式进行转换并转发

{
    "receiver":"web\\.hook",
    "status":"firing",
    "alerts":[
        {
            "status":"firing",
            "labels":{
                "alertname":"node-cpu-used-percent-high-5",
                "desc":"总CPU使用率",
                "instance":"localhost:9100",
                "unit":"%"
            },
            "annotations":{
                "description":"instance: localhost:9100 CPU 使用率高于5%, 时间已经5分钟了。",
                "notify":"Info",
                "summary":"instance: localhost:9100 CPU 使用率过高",
                "value":"100"
            },
            "startsAt":"2023-07-13T02:56:18.783Z",
            "endsAt":"0001-01-01T00:00:00Z",
            "generatorURL":"http://grafana:9090/graph?g0.expr=node_exporter%3Acpu%3Atotal%3Apercent+%3E+5\u0026g0.tab=1",
            "fingerprint":"1284c73ab2eeda96"
        }
    ],
    "groupLabels":{
        "alertname":"node-cpu-used-percent-high-5"
    },
    "commonLabels":{
        "alertname":"node-cpu-used-percent-high-5",
        "desc":"总CPU使用率",
        "instance":"localhost:9100",
        "unit":"%"
    },
    "commonAnnotations":{
        "description":"instance: localhost:9100 CPU 使用率高于5%, 时间已经5分钟了。",
        "notify":"Info",
        "summary":"instance: localhost:9100 CPU 使用率过高",
        "value":"100"
    },
    "externalURL":"http://grafana:9093",
    "version":"4",
    "groupKey":"{}:{alertname=\"node-cpu-used-percent-high-5\"}",
    "truncatedAlerts":0
}

对接企业微信群机器人

首先在企业微信群里创建一个机器人,获取到机器人的 webhook 后,打开企业微信开发者文档,查看发送给机器人的请求格式,这里选择 markdown 格式

2023-07-13_110158_1654980.8140918977665011.png

微信截图_20230927181809.png

完整代码

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"github.com/gin-gonic/gin"
	"io/ioutil"
	"net/http"
)

const qyapi = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=769d17c2-fc86-4273-b6b1-0389fb5ed57d"

type alertManager struct {
	Status string  `json:"status"`
	Alerts []alert `json:"alerts"`
}

type alert struct {
	Status      string            `json:"status"`
	Labels      map[string]string `json:"labels"`
	Annotations map[string]string `json:"annotations"`
}

type message struct {
	Msgtype  string `json:"msgtype"`
	MarkDown struct {
		Content string `json:"content"`
	} `json:"markdown"`
}

func main() {
	route := gin.Default()
	route.POST("/", receiveHook)
	route.Run(":5001")
}

func receiveHook(c *gin.Context) {

	var am alertManager

	body, err := c.GetRawData()
	if err != nil {
		c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
		return
	}

	if err = json.Unmarshal(body, &am); err != nil {
		c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
		return
	}

	if err = transmitHook(&am); err != nil {
		c.JSON(200, gin.H{"code": 400, "msg": err.Error()})
		return
	}
	c.JSON(200, gin.H{"code": 200, "msg": "success"})
}

func transmitHook(a *alertManager) error {
	if a.Status != "firing" {
		fmt.Println("The alarm status is not firing")
		return nil
	}

	for _, v := range a.Alerts {
		if v.Status != "firing" {
			fmt.Println("The alarm status is not firing")
			return nil
		}

		m := &message{
			Msgtype: "markdown",
			MarkDown: struct {
				Content string `json:"content"`
			}(struct{ Content string }{Content: fmt.Sprintf("<font color=\"red\">告警内容:</font>%s\n<font color=\"red\">告警说明:</font>%s\n<font color=\"red\">告警值:</font>%s", v.Labels["alertname"], v.Annotations["description"], v.Annotations["value"])}),
		}

		byteBuf := bytes.NewBuffer([]byte{})
		encoder := json.NewEncoder(byteBuf)
		encoder.SetEscapeHTML(false)
		if err := encoder.Encode(&m); err != nil {
			return err
		}

		req, err := http.NewRequest("POST", qyapi, byteBuf)
		if err != nil {
			return err
		}

		req.Header.Set("Content-Type", "application/json")

		client := &http.Client{}
		resp, err := client.Do(req)
		if err != nil {
			return err
		}
		defer resp.Body.Close()

		body, err := ioutil.ReadAll(resp.Body)
		if err != nil {
			return err
		}
		fmt.Println(string(body))
	}

	return nil
}

完整代码只是测试功能,编写生产代码还需要详细设计。