Prometheus: change DiskspaceLow Alert
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
400557ed0c
commit
a441ff630b
1 changed files with 4 additions and 12 deletions
|
@ -18,22 +18,14 @@ spec:
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Memory over 80%"
|
summary: "Memory over 80%"
|
||||||
description: "Memory on node {{ $labels.node }} is over 80% for more than 5 minutes. Plox fix. Memory usage: {{ $value }}%"
|
description: "Memory on node {{ $labels.node }} is over 80% for more than 5 minutes. Plox fix. Memory usage: {{ $value }}%"
|
||||||
- alert: DiskspaceLowWorker
|
- alert: DiskspaceLow
|
||||||
expr: round(node_filesystem_avail_bytes{mountpoint="/", node=~"worker.*"} / 1073742000, 0.1) < 25
|
expr: round(node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100, 1) < 5
|
||||||
for: 1m
|
for: 1m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Free disk space below 25 GB"
|
summary: "Free disk space at {{ $value }}%"
|
||||||
description: "Disk space on node {{ $labels.node }} is under 25 GB. Plox fix. Free Space: {{ $value }} GB on partition {{ $labels.device }}"
|
description: "Disk space on node {{ $labels.node }} is only {{ $value }}%. Plox fix. Partition: {{ $labels.device }}"
|
||||||
- alert: DiskspaceLowMaster
|
|
||||||
expr: round(node_filesystem_avail_bytes{mountpoint="/", node=~"master.*"} / 1073742000, 0.1) < 2
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Free disk space below 2 GB"
|
|
||||||
description: "Disk space on node {{ $labels.node }} is under 2 GB. Plox fix. Free Space: {{ $value }} GB on partition {{ $labels.device }}"
|
|
||||||
- alert: HostMemoryUnderMemoryPressure
|
- alert: HostMemoryUnderMemoryPressure
|
||||||
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
|
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
|
||||||
for: 2m
|
for: 2m
|
||||||
|
|
Loading…
Reference in a new issue