feat: prometheus alerting

This commit is contained in:
Rob Watson 2023-07-18 09:32:46 +02:00
parent 22791dbf55
commit f2227fc61d
7 changed files with 54 additions and 10 deletions

View File

@ -22,6 +22,17 @@ configMapGenerator:
behavior: merge behavior: merge
files: files:
- prometheus.yml=prometheus.yaml - prometheus.yml=prometheus.yaml
- alerting_rules.yml=prometheus-alerting-rules.yaml
options:
labels:
app: prometheus
- name: prometheus-alertmanager
behavior: merge
files:
- alertmanager.yml=prometheus-alertmanager.yaml
options:
labels:
app: prometheus
- name: grafana - name: grafana
behavior: merge behavior: merge
files: files:

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1,17 @@
global: {}
receivers:
- name: mailgun-receiver
email_configs:
- to: alerts@netflux.io
from: alerts@netflux.io
smarthost: smtp.eu.mailgun.org:587
auth_username: test@example.com
auth_password: foobar
send_resolved: true
route:
group_interval: 5m
group_wait: 10s
receiver: mailgun-receiver
repeat_interval: 3h
templates:
- /etc/alertmanager/*.tmpl

View File

@ -296,15 +296,12 @@ alerting:
- source_labels: [__meta_kubernetes_namespace] - source_labels: [__meta_kubernetes_namespace]
regex: default regex: default
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_label_app] - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
regex: prometheus regex: prometheus
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_label_component] - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
regex: alertmanager regex: alertmanager
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe]
regex: .*
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_number] - source_labels: [__meta_kubernetes_pod_container_port_number]
regex: "9093" regex: "9093"
action: keep action: keep

View File

@ -11,6 +11,17 @@ configMapGenerator:
behavior: merge behavior: merge
files: files:
- prometheus.yml=prometheus.yaml - prometheus.yml=prometheus.yaml
- alerting_rules.yml=prometheus-alerting-rules.yaml
options:
labels:
app: prometheus
- name: prometheus-alertmanager
behavior: merge
files:
- alertmanager.yml=secrets/prometheus-alertmanager.yaml
options:
labels:
app: prometheus
- name: grafana - name: grafana
behavior: merge behavior: merge
files: files:

View File

@ -0,0 +1,10 @@
groups:
- name: default-group
rules:
- alert: DBRootFSUsed
expr: 100 - ((node_filesystem_avail_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"}) > 85
for: 10m
labels:
severity: alert
annotations:
summary: database disk space

View File

@ -305,15 +305,12 @@ alerting:
- source_labels: [__meta_kubernetes_namespace] - source_labels: [__meta_kubernetes_namespace]
regex: default regex: default
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_label_app] - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance]
regex: prometheus regex: prometheus
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_label_component] - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
regex: alertmanager regex: alertmanager
action: keep action: keep
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe]
regex: .*
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_number] - source_labels: [__meta_kubernetes_pod_container_port_number]
regex: "9093" regex: "9093"
action: keep action: keep