From f2227fc61dd82de5e0c6f674c05d839c6ff7209d Mon Sep 17 00:00:00 2001 From: Rob Watson Date: Tue, 18 Jul 2023 09:32:46 +0200 Subject: [PATCH] feat: prometheus alerting --- deploy/dev/kustomization.yaml | 11 +++++++++++ deploy/dev/prometheus-alerting-rules.yaml | 1 + deploy/dev/prometheus-alertmanager.yaml | 17 +++++++++++++++++ deploy/dev/prometheus.yaml | 7 ++----- deploy/prod/kustomization.yaml | 11 +++++++++++ deploy/prod/prometheus-alerting-rules.yaml | 10 ++++++++++ deploy/prod/prometheus.yaml | 7 ++----- 7 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 deploy/dev/prometheus-alerting-rules.yaml create mode 100644 deploy/dev/prometheus-alertmanager.yaml create mode 100644 deploy/prod/prometheus-alerting-rules.yaml diff --git a/deploy/dev/kustomization.yaml b/deploy/dev/kustomization.yaml index 757879e..656eaa1 100644 --- a/deploy/dev/kustomization.yaml +++ b/deploy/dev/kustomization.yaml @@ -22,6 +22,17 @@ configMapGenerator: behavior: merge files: - prometheus.yml=prometheus.yaml + - alerting_rules.yml=prometheus-alerting-rules.yaml + options: + labels: + app: prometheus +- name: prometheus-alertmanager + behavior: merge + files: + - alertmanager.yml=prometheus-alertmanager.yaml + options: + labels: + app: prometheus - name: grafana behavior: merge files: diff --git a/deploy/dev/prometheus-alerting-rules.yaml b/deploy/dev/prometheus-alerting-rules.yaml new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/deploy/dev/prometheus-alerting-rules.yaml @@ -0,0 +1 @@ +{} diff --git a/deploy/dev/prometheus-alertmanager.yaml b/deploy/dev/prometheus-alertmanager.yaml new file mode 100644 index 0000000..cc42f3e --- /dev/null +++ b/deploy/dev/prometheus-alertmanager.yaml @@ -0,0 +1,17 @@ +global: {} +receivers: +- name: mailgun-receiver + email_configs: + - to: alerts@netflux.io + from: alerts@netflux.io + smarthost: smtp.eu.mailgun.org:587 + auth_username: test@example.com + auth_password: foobar + send_resolved: true +route: + group_interval: 5m + group_wait: 10s + receiver: mailgun-receiver + repeat_interval: 3h +templates: +- /etc/alertmanager/*.tmpl diff --git a/deploy/dev/prometheus.yaml b/deploy/dev/prometheus.yaml index d289c38..72ccdf2 100644 --- a/deploy/dev/prometheus.yaml +++ b/deploy/dev/prometheus.yaml @@ -296,15 +296,12 @@ alerting: - source_labels: [__meta_kubernetes_namespace] regex: default action: keep - - source_labels: [__meta_kubernetes_pod_label_app] + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] regex: prometheus action: keep - - source_labels: [__meta_kubernetes_pod_label_component] + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] regex: alertmanager action: keep - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe] - regex: .* - action: keep - source_labels: [__meta_kubernetes_pod_container_port_number] regex: "9093" action: keep diff --git a/deploy/prod/kustomization.yaml b/deploy/prod/kustomization.yaml index 4b4b766..7aca8a2 100644 --- a/deploy/prod/kustomization.yaml +++ b/deploy/prod/kustomization.yaml @@ -11,6 +11,17 @@ configMapGenerator: behavior: merge files: - prometheus.yml=prometheus.yaml + - alerting_rules.yml=prometheus-alerting-rules.yaml + options: + labels: + app: prometheus +- name: prometheus-alertmanager + behavior: merge + files: + - alertmanager.yml=secrets/prometheus-alertmanager.yaml + options: + labels: + app: prometheus - name: grafana behavior: merge files: diff --git a/deploy/prod/prometheus-alerting-rules.yaml b/deploy/prod/prometheus-alerting-rules.yaml new file mode 100644 index 0000000..4b4dd04 --- /dev/null +++ b/deploy/prod/prometheus-alerting-rules.yaml @@ -0,0 +1,10 @@ +groups: +- name: default-group + rules: + - alert: DBRootFSUsed + expr: 100 - ((node_filesystem_avail_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"}) > 85 + for: 10m + labels: + severity: alert + annotations: + summary: database disk space diff --git a/deploy/prod/prometheus.yaml b/deploy/prod/prometheus.yaml index c2a7260..3302944 100644 --- a/deploy/prod/prometheus.yaml +++ b/deploy/prod/prometheus.yaml @@ -305,15 +305,12 @@ alerting: - source_labels: [__meta_kubernetes_namespace] regex: default action: keep - - source_labels: [__meta_kubernetes_pod_label_app] + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] regex: prometheus action: keep - - source_labels: [__meta_kubernetes_pod_label_component] + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] regex: alertmanager action: keep - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe] - regex: .* - action: keep - source_labels: [__meta_kubernetes_pod_container_port_number] regex: "9093" action: keep