diff --git a/deploy/base/inflated/grafana/templates/configmap.yaml b/deploy/base/inflated/grafana/templates/configmap.yaml index 0b1e691..def782c 100644 --- a/deploy/base/inflated/grafana/templates/configmap.yaml +++ b/deploy/base/inflated/grafana/templates/configmap.yaml @@ -35,6 +35,8 @@ data: url: http://prometheus-server contactpoints.yaml: | apiVersion: 1 + rules.yaml: | + apiVersion: 1 dashboardproviders.yaml: | apiVersion: 1 providers: diff --git a/deploy/base/inflated/grafana/templates/deployment.yaml b/deploy/base/inflated/grafana/templates/deployment.yaml index 5da1cc7..a8ca33a 100644 --- a/deploy/base/inflated/grafana/templates/deployment.yaml +++ b/deploy/base/inflated/grafana/templates/deployment.yaml @@ -26,7 +26,7 @@ spec: app.kubernetes.io/name: grafana app.kubernetes.io/instance: grafana annotations: - checksum/config: 36a36abf9dd9e61eaa035cfc90acbb82d3e6c131aa9fd57eaf98ae5380401bf3 + checksum/config: 008eb6b5d7e1de9723209fca089750d0b54ffa5b829c51598d8def8d878c44c5 checksum/dashboards-json-config: 2b3b91b055108de2da8951a904e7c7ea49b5a5a250d2649ba27b7b7b7ec34cfd checksum/sc-dashboard-provider-config: 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b kubectl.kubernetes.io/default-container: grafana @@ -83,6 +83,9 @@ spec: - name: config mountPath: "/etc/grafana/provisioning/alerting/contactpoints.yaml" subPath: "contactpoints.yaml" + - name: config + mountPath: "/etc/grafana/provisioning/alerting/rules.yaml" + subPath: "rules.yaml" - name: config mountPath: "/etc/grafana/provisioning/dashboards/dashboardproviders.yaml" subPath: "dashboardproviders.yaml" diff --git a/deploy/base/values/grafana.yaml b/deploy/base/values/grafana.yaml index ef38a47..a83968b 100644 --- a/deploy/base/values/grafana.yaml +++ b/deploy/base/values/grafana.yaml @@ -22,6 +22,8 @@ datasources: alerting: contactpoints.yaml: apiVersion: 1 + rules.yaml: + apiVersion: 1 dashboardProviders: dashboardproviders.yaml: apiVersion: 1 diff --git a/deploy/prod/grafana-rules.yaml b/deploy/prod/grafana-rules.yaml new file mode 100644 index 0000000..06aa0e9 --- /dev/null +++ b/deploy/prod/grafana-rules.yaml @@ -0,0 +1,288 @@ +apiVersion: 1 +groups: +- name: rules.yaml + interval: 60s + folder: Solar + rules: + - id: 2 + uid: c40e8d57-9d65-4a28-8485-a46b810c033e + orgID: 1 + folderUID: ded7fd24-65bf-4e04-95ec-0970287687cb + ruleGroup: every_minute + title: solar_grid_mode_unexpected_value + condition: B + data: + - refId: A + queryType: "" + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: P0A2ACEDBDFD04F7F + model: + datasource: + type: postgres + uid: P0A2ACEDBDFD04F7F + editorMode: code + format: table + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + rawQuery: true + rawSql: SELECT grid_mode FROM et_runtime_data ORDER BY "timestamp" DESC LIMIT 1 + refId: A + sql: + columns: + - parameters: + - name: grid_mode + type: functionParameter + type: function + groupBy: + - property: + type: string + type: groupBy + limit: 5 + orderBy: + property: + name: '"timestamp"' + type: string + type: property + orderByDirection: ASC + whereJsonTree: + children1: + - id: a9b888a9-0123-4456-b89a-b18a6c43c585 + properties: + field: null + operator: null + value: [] + valueSrc: [] + type: rule + id: bb8b9bba-89ab-4cde-b012-318a673c506d + type: group + table: et_runtime_data + - refId: B + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 1 + - 1 + type: outside_range + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: last + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: "" + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: classic_conditions + updated: "2023-09-06T20:55:12Z" + noDataState: Alerting + execErrState: Error + for: 5m + isPaused: false + - id: 4 + uid: e6c41854-ace4-4149-9aea-dfb74454a496 + orgID: 1 + folderUID: ded7fd24-65bf-4e04-95ec-0970287687cb + ruleGroup: every_minute + title: solar_temperature_high + condition: B + data: + - refId: A + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: P0A2ACEDBDFD04F7F + model: + datasource: + type: postgres + uid: P0A2ACEDBDFD04F7F + editorMode: code + format: table + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + rawQuery: true + rawSql: 'SELECT temperature FROM et_runtime_data WHERE timestamp > (NOW() - ''15 minutes''::interval) ORDER BY "timestamp" DESC LIMIT 50 ' + refId: A + sql: + columns: + - parameters: + - name: temperature + type: functionParameter + type: function + groupBy: + - property: + type: string + type: groupBy + limit: 50 + orderBy: + property: + name: '"timestamp"' + type: string + type: property + orderByDirection: DESC + whereJsonTree: + children1: + - id: 8bba9888-89ab-4cde-b012-318a70ec0037 + properties: + field: '"timestamp"' + operator: equal + value: + - null + valueSrc: + - value + valueType: + - datetime + type: rule + id: bbb98b9a-89ab-4cde-b012-318a70eb5f5f + type: group + table: et_runtime_data + - refId: B + queryType: "" + relativeTimeRange: + from: 0 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 60 + - 0 + type: gt + operator: + type: and + query: + params: + - A + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: "" + intervalMs: 1000 + maxDataPoints: 43200 + refId: B + type: classic_conditions + updated: "2023-09-07T18:37:57Z" + noDataState: NoData + execErrState: Error + for: 5m + isPaused: false +- name: rules.yaml + interval: 60s + folder: Nodes (General) + rules: + - id: 6 + uid: a33b6255-4262-4924-bc25-99893d3e6d2c + orgID: 1 + folderUID: b2d32456-52c2-456e-8906-4652925c88c6 + ruleGroup: every_minute + title: db_disk_utilization_high + condition: C + data: + - refId: A + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + expr: 100 - ((node_filesystem_avail_bytes{instance="prod-db:9100",job="node",device!~'rootfs',mountpoint="/mnt/volume_db"} * 100) / node_filesystem_size_bytes{instance="prod-db:9100",job="node",device!~'rootfs',mountpoint="/mnt/volume_db"}) + hide: false + instant: true + intervalMs: 1000 + maxDataPoints: 43200 + range: false + refId: A + - refId: B + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - B + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + reducer: mean + refId: B + type: reduce + - refId: C + queryType: "" + relativeTimeRange: + from: 600 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 85 + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: B + hide: false + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + updated: "2023-09-07T18:50:52Z" + noDataState: NoData + execErrState: Error + for: 5m + isPaused: false + diff --git a/deploy/prod/kustomization.yaml b/deploy/prod/kustomization.yaml index 601a0f8..af1fce8 100644 --- a/deploy/prod/kustomization.yaml +++ b/deploy/prod/kustomization.yaml @@ -28,6 +28,7 @@ configMapGenerator: - grafana.ini=secrets/grafana-config.ini - datasources.yaml=secrets/grafana-datasources.yaml - contactpoints.yaml=grafana-contactpoints.yaml + - rules.yaml=grafana-rules.yaml - name: invidious-config files: - config.yml=invidious-config.yaml diff --git a/deploy/prod/prometheus-alerting-rules.yaml b/deploy/prod/prometheus-alerting-rules.yaml index 4b4dd04..2ae2220 100644 --- a/deploy/prod/prometheus-alerting-rules.yaml +++ b/deploy/prod/prometheus-alerting-rules.yaml @@ -1,10 +1 @@ -groups: -- name: default-group - rules: - - alert: DBRootFSUsed - expr: 100 - ((node_filesystem_avail_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{instance="prod-db:9100",job="node",mountpoint="/",fstype!="rootfs"}) > 85 - for: 10m - labels: - severity: alert - annotations: - summary: database disk space +groups: []