add etcdbackup alerts

This commit is contained in:
Aaron Riedel 2023-06-22 19:59:06 +02:00
parent d75cb6b7b6
commit fd6cc7ef3d
Signed by: aaron
GPG key ID: 643004654D40D577

View file

@ -89,6 +89,22 @@ spec:
annotations: annotations:
summary: "SMARTcheck not running" summary: "SMARTcheck not running"
description: 'The last SMARTcheck on server {{ $labels.node }} was more than 3h ago. Plox fix.' description: 'The last SMARTcheck on server {{ $labels.node }} was more than 3h ago. Plox fix.'
- name: etcdbackup
rules:
- alert: "etcdbackup too old"
expr: (time() - etcdbackup_time) > 10800
labels:
severity: warning
annotations:
summary: "etcd backup not running"
description: 'The last etcd backup on node {{ $labels.node }} was more than 3h ago. Plox fix.'
- alert: "etcdbackup failed"
expr: etcdbackup_result > 0
labels:
severity: warning
annotations:
summary: "etcdbackup failed"
description: "The backup script for etcd failed on node {{ $labels.node }}. Plox fix."
- name: kubernetes - name: kubernetes
rules: rules:
- alert: KubernetesUnhealthyPod - alert: KubernetesUnhealthyPod