From a441ff630b657d5c6506c5a9670a73c781e9dc57 Mon Sep 17 00:00:00 2001 From: Aaron Riedel Date: Thu, 23 Nov 2023 20:35:46 +0100 Subject: [PATCH] Prometheus: change DiskspaceLow Alert --- prometheus/alerts.yaml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/prometheus/alerts.yaml b/prometheus/alerts.yaml index d187232..d4cd13b 100644 --- a/prometheus/alerts.yaml +++ b/prometheus/alerts.yaml @@ -18,22 +18,14 @@ spec: annotations: summary: "Memory over 80%" description: "Memory on node {{ $labels.node }} is over 80% for more than 5 minutes. Plox fix. Memory usage: {{ $value }}%" - - alert: DiskspaceLowWorker - expr: round(node_filesystem_avail_bytes{mountpoint="/", node=~"worker.*"} / 1073742000, 0.1) < 25 + - alert: DiskspaceLow + expr: round(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100, 1) < 5 for: 1m labels: severity: warning annotations: - summary: "Free disk space below 25 GB" - description: "Disk space on node {{ $labels.node }} is under 25 GB. Plox fix. Free Space: {{ $value }} GB on partition {{ $labels.device }}" - - alert: DiskspaceLowMaster - expr: round(node_filesystem_avail_bytes{mountpoint="/", node=~"master.*"} / 1073742000, 0.1) < 2 - for: 1m - labels: - severity: warning - annotations: - summary: "Free disk space below 2 GB" - description: "Disk space on node {{ $labels.node }} is under 2 GB. Plox fix. Free Space: {{ $value }} GB on partition {{ $labels.device }}" + summary: "Free disk space at {{ $value }}%" + description: "Disk space on node {{ $labels.node }} is only {{ $value }}%. Plox fix. Partition: {{ $labels.device }}" - alert: HostMemoryUnderMemoryPressure expr: rate(node_vmstat_pgmajfault[1m]) > 1000 for: 2m