From a7578bf43068e60ec3da79a7bcf4b73db5b4642c Mon Sep 17 00:00:00 2001 From: Aaron Riedel Date: Fri, 21 Apr 2023 07:53:09 +0200 Subject: [PATCH 1/2] edit in alert PrometheusTargetMissing (again) --- prometheus/config-map.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus/config-map.yaml b/prometheus/config-map.yaml index 1f0444f..44558a1 100644 --- a/prometheus/config-map.yaml +++ b/prometheus/config-map.yaml @@ -53,7 +53,7 @@ data: severity: critical annotations: summary: Prometheus target missing (instance {{ $labels.instance }}) - description: "A Prometheus target has disappeared. \n Job: {{ $labels.job }}\n App: {{ $labels.app }}\n Pod: {{ $labels.pod }}\n Node: {{ $labels.node }}\n Namespace: {{ $labels.namespace }}" + description: "A Prometheus target has disappeared. {{if ne $labels.job ""}}\n Job: {{ $labels.job }}{{end}}{{if ne $labels.app ""}}\n App: {{ $labels.app }}{{end}}{{if ne $labels.pod ""}}\n Pod: {{ $labels.pod }}{{end}}{{if ne $labels.node ""}}\n Node: {{ $labels.node }}{{end}}{{if ne $labels.namespace ""}}\n Namespace: {{ $labels.namespace }}{{end}}" - name: PrometheusConfigurationReloadFailure rules: - alert: PrometheusConfigurationReloadFailure From 414b7d931893915242f5d810f761c0951df7c8ac Mon Sep 17 00:00:00 2001 From: Aaron Riedel Date: Fri, 21 Apr 2023 07:59:28 +0200 Subject: [PATCH 2/2] add alert for unhealthy pod --- prometheus/config-map.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/prometheus/config-map.yaml b/prometheus/config-map.yaml index 44558a1..0ff221f 100644 --- a/prometheus/config-map.yaml +++ b/prometheus/config-map.yaml @@ -44,6 +44,16 @@ data: annotations: summary: "Free disk space below 2 GB" description: "Disk space on server {{ $labels.node }} is under 2 GB. Plox fix. Free Space: {{ $value }} GB on partition {{ $labels.device }}" + - name: KubernetesUnhealthyPod + rules: + - alert: KubernetesUnhealthyPod + expr: kube_pod_container_status_waiting_reason = 1 + for: 5m + labels: + severity: warning + annotations: + summary: "The Pod {{ $labels.pod }} is {{ $labels.reason }}"" + description: "The Pod {{ $labels.pod }} is in the state {{ $labels.reason }} for more than 5m. The Pod is in namespace {{ $labels.namespace }} and on node {{ $labels.node }}." - name: PrometheusTargetMissing rules: - alert: PrometheusTargetMissing