diff --git a/prometheus/alerts.yaml b/prometheus/alerts.yaml index 947f5af..a0c9a2b 100644 --- a/prometheus/alerts.yaml +++ b/prometheus/alerts.yaml @@ -89,6 +89,22 @@ spec: annotations: summary: "SMARTcheck not running" description: 'The last SMARTcheck on server {{ $labels.node }} was more than 3h ago. Plox fix.' + - name: etcdbackup + rules: + - alert: "etcdbackup too old" + expr: (time() - etcdbackup_time) > 10800 + labels: + severity: warning + annotations: + summary: "etcd backup not running" + description: 'The last etcd backup on node {{ $labels.node }} was more than 3h ago. Plox fix.' + - alert: "etcdbackup failed" + expr: etcdbackup_result > 0 + labels: + severity: warning + annotations: + summary: "etcdbackup failed" + description: "The backup script for etcd failed on node {{ $labels.node }}. Plox fix." - name: kubernetes rules: - alert: KubernetesUnhealthyPod