Monitoring servers and notify to a discord webhook


This time, I will use Prometheus to track how my servers are doing. I will write alert rules for protheus to check that my servers are still exporting metric data for prometheus

Let's create a project

mkdir prometheus-monitor
cd prometheus-monitor

Then create a rule for prometheus

mkdir prometheus
touch prometheus/alert.rules.yml
groups:
- name: AllInstance
  rules:
  - alert: InstanceDown
    expr: up == 0
    for: 10s
    labels:
      severity: critical
    annotations:
      summary: "Server Unavailable"
      description: "Server -> {{ $labels.job }} - {{ $labels.instance }}"

The rule named InstanceDown will be triggered when prometheus can't fetch metric data from a server (condition: up == 0) and it will set the job name and the instance name in annotations' description.

Now, write the prometheus config files

touch prometheus/prometheus.yml
global:
  scrape_interval: 1m
  scrape_timeout: 5s
rule_files:
  - alert.rules.yml
alerting:
  alertmanagers:
    - static_configs:
        - targets: ["alertmanager:9093"]
scrape_configs:
  - job_name: myjob
    metrics_path: /metrics
    scheme: http
    follow_redirects: true
    static_configs:
      - targets:
          - "172.18.117.234:8081"
          - "172.18.117.234:8082"

Since I use Alertmanager to send alert data, so let's config the alertmanager

mkdir alertmanager
touch alertmanager/alertmanager.yml
global:
route:
  group_by: [...]
  repeat_interval: 3h
  receiver: webhook_issues
receivers:
  - name: webhook_issues
    webhook_configs:
    - url: http://172.18.117.234:8080/webhook

From the two configs above, 172.18.117.234 is my machine ip and you will need to change it to be your ip address for your server

And I will run prometheus on Docker, then I will create a docker-compose file

touch docker-compose.yml
version: '3'
services:
  alertmanager:
    image: prom/alertmanager
    ports:
    - 9093:9093
    volumes:
    - ./alertmanager:/etc/alertmanager
  prometheus:
    image: prom/prometheus
    ports:
    - 9090:9090
    volumes:
    - ./prometheus:/etc/prometheus

That's it for the configs. Next, let's build a server to test

go mod init goprom
touch main.go
package main

import (
	"context"
	"fmt"
	"log"
	"math/rand"
	"os"
	"sync"
	"time"

	"github.com/gin-gonic/gin"
	"go.opentelemetry.io/otel/attribute"
	"go.opentelemetry.io/otel/exporters/prometheus"
	"go.opentelemetry.io/otel/metric/global"
	"go.opentelemetry.io/otel/metric/instrument"
	"go.opentelemetry.io/otel/sdk/metric/aggregator/histogram"
	controller "go.opentelemetry.io/otel/sdk/metric/controller/basic"
	"go.opentelemetry.io/otel/sdk/metric/export/aggregation"
	processor "go.opentelemetry.io/otel/sdk/metric/processor/basic"
	selector "go.opentelemetry.io/otel/sdk/metric/selector/simple"
)

var (
	lemonsKey = attribute.Key("ex.com/lemons")
)

func initMeterServer() {
	config := prometheus.Config{
		DefaultHistogramBoundaries: []float64{0.1, 0.2, 0.5, 1, 2},
	}
	c := controller.New(
		processor.NewFactory(
			selector.NewWithHistogramDistribution(
				histogram.WithExplicitBoundaries(config.DefaultHistogramBoundaries),
			),
			aggregation.CumulativeTemporalitySelector(),
			processor.WithMemory(true),
		),
	)
	exporter, err := prometheus.New(config, c)
	if err != nil {
		log.Panicf("failed to initialize prometheus exporter %v", err)
	}

	global.SetMeterProvider(exporter.MeterProvider())

	r := gin.Default()
	r.GET("/metrics", gin.WrapH(exporter))
	go func() {
		_ = r.Run(":" + os.Getenv("PORT"))
	}()

	fmt.Println("Prometheus server running on :8080")
}

func main() {
	initMeterServer()

	meter := global.Meter("")

	observerLock := new(sync.RWMutex)
	var value float64
	commonAttrs := []attribute.KeyValue{attribute.String("A", "1"), attribute.String("A", "2")}

	gaugeObserver, err := meter.AsyncFloat64().Gauge("http_request_sample")
	if err != nil {
		log.Panicf("failed to initialize instrument: %v", err)
	}
	_ = meter.RegisterCallback([]instrument.Asynchronous{gaugeObserver}, func(ctx context.Context) {
		// use to capture the current value
		observerLock.RLock()
		value := value
		observerLock.RUnlock()
		gaugeObserver.Observe(ctx, value, commonAttrs...)
	})

	histogram, err := meter.SyncFloat64().Histogram("http_request_duration_seconds")
	if err != nil {
		log.Panicf("failed to initialize instrument: %v", err)
	}

	ctx := context.Background()

	for {
		observerLock.Lock()
		value = rand.Float64() * 3
		histogram.Record(ctx, value, commonAttrs...)
		observerLock.Unlock()
		time.Sleep(1 * time.Second)
	}
}

We will use this to export data for prometheus

Next, write a webhook server

mdkir webhook
touch webhook/main.go
package main

import (
	"github.com/gin-gonic/gin"
)

func main() {
	r := gin.Default()
	r.POST("/webhook", discordhook)
	r.Run(":8080")
}
package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"log"
	"net/http"
	"os"
	"strings"

	"github.com/gin-gonic/gin"
)

//ref: https://prometheus.io/docs/alerting/latest/configuration/#webhook_config
type AlertManagerWebhook struct {
	Version           string            `json:"version"`
	GroupKey          string            `json:"groupKey"`
	TruncatedAlerts   int               `json:"truncatedAlerts"`
	Status            string            `json:"status"`
	Receiver          string            `json:"receiver"`
	GroupLabels       map[string]string `json:"groupLabels"`
	CommonLabels      map[string]string `json:"commonLabels"`
	CommonAnnotations map[string]string `json:"commonAnnotations"`
	ExternalURL       string            `json:"externalURL"`
	Alerts            []struct {
		Status       string            `json:"status"`
		Label        map[string]string `json:"labels"`
		Annotations  map[string]string `json:"annotations"`
		StartsAt     string            `json:"startsAt"`
		EndsAt       string            `json:"endsAt"`
		GeneratorURL string            `json:"generatorURL"`
		Fingerprint  string            `json:"fingerprint"`
	} `json:"alerts"`
}

//ref: https://discord.com/developers/docs/resources/webhook#execute-webhook
type (
	DiscordEmbedField struct {
		Name   string `json:"name"`
		Value  string `json:"value"`
		Inline bool   `json:"inline"`
	}
	DiscordEmbed struct {
		Title       string              `json:"title"`
		Description string              `json:"description"`
		Fields      []DiscordEmbedField `json:"fields"`
	}
	DiscordHook struct {
		Content string         `json:"content"`
		Embeds  []DiscordEmbed `json:"embeds"`
	}
)

func discordhook(ctx *gin.Context) {
	info := &AlertManagerWebhook{}
	json.NewDecoder(ctx.Request.Body).Decode(info)
	fmt.Printf("%+v\n", info)
	hook := DiscordHook{
		Content: "=== Alert ===",
		Embeds: []DiscordEmbed{
			{
				Title:       fmt.Sprintf("[%s] %s", strings.ToUpper(info.Status), info.CommonAnnotations["summary"]),
				Description: info.CommonAnnotations["description"],
			},
		},
	}
	var buf bytes.Buffer
	json.NewEncoder(&buf).Encode(hook)
	if _, err := http.Post(os.Getenv("WEBHOOK_URL"), "application/json", &buf); err != nil {
		log.Println(err)
	}
}

To run all of this

# start the webhook
WEBHOOK_URL=<your-webhook-url> go run webhook/main.go
# open two terminals and run these two commands separately for the metric data server
PORT=8081 go run .
PORT=8082 go run .

Now you can visit the metric data at localhost:8081/metrics and localhost:8082/metrics. Check the prometheus server at localhost:9090 to see how it is working.

Next, try to stop the servers (except the webhook server). Prometheus will fail to collect metric data and the alert rules will be triggered. Once everything meets the requirements, it will send data to your webhook server and you will see the webhook send a message in your discord channel.

If you start the servers again, prometheus will send an alert again but the status will be resolve instead of firing which means the problem is fixed.

sources