core-deployments/prometheus/values.yaml

145 lines
No EOL
4 KiB
YAML

alertmanager:
alertmanagerSpec:
podAntiAffinity: "hard"
replicas: 2
secrets:
- "telegram-api"
configMaps:
- "templates"
storage:
volumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 5Gi
useExistingSecret: false
config:
global:
resolve_timeout: 5m
templates:
- '/etc/alertmanager/configmaps/templates/telegram.tmpl'
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 30s
repeat_interval: 24h
receiver: 'tg1'
routes:
- matchers:
- severity=warning
receiver: 'tg1'
- matchers:
- severity=critical
receiver: 'tg1'
receivers:
- name: tg1
telegram_configs:
- bot_token_file: '/etc/alertmanager/secrets/telegram-api/api_key'
chat_id: -995270884
api_url: "https://api.telegram.org"
send_resolved: true
parse_mode: "HTML"
message: '{{ template "telegram.aaron" .}}'
inhibit_rules:
- source_matchers:
- severity = critical
target_matchers:
- severity = warning
- severity = info
equal: ['node']
- source_matchers:
- alertname = KubeNodeUnreachable
target_matchers:
- severity =~ "warning|info"
ingress:
paths:
- /
enabled: true
hosts:
- alertmanager.services.yolokube.de
annotations:
traefik.ingress.kubernetes.io/router.middlewares: traefik-basic-auth@kubernetescrd
ingressPerReplica:
pathType: ImplementationSpecific
paths:
- /
enabled: true
hostPrefix: alertmanager
hostDomain: services.yolokube.de
annotations:
traefik.ingress.kubernetes.io/router.middlewares: traefik-basic-auth@kubernetescrd
servicePerReplica:
enabled: true
podAntiAffinity: "hard"
grafana:
defaultDashboardsTimezone: Europe/Berlin
ingress:
enabled: true
hosts:
- grafana.services.yolokube.de
persistence:
enabled: true
accessModes:
- ReadWriteMany
prometheus-node-exporter:
prometheus:
monitor:
enabled: true
relabelings:
- action: replace
sourceLabels: [__meta_kubernetes_endpoint_node_name]
targetLabel: node
extraArgs:
- '--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/)'
- '--collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$'
- '--collector.textfile.directory=/host/root/var/log/'
- '--collector.ethtool'
prometheus:
servicePerReplica:
enabled: true
ingress:
paths:
- /
enabled: true
hosts:
- prometheus.services.yolokube.de
annotations:
traefik.ingress.kubernetes.io/router.middlewares: traefik-basic-auth@kubernetescrd
ingressPerReplica:
pathType: ImplementationSpecific
paths:
- /
enabled: true
hostPrefix: prometheus
hostDomain: services.yolokube.de
annotations:
traefik.ingress.kubernetes.io/router.middlewares: traefik-basic-auth@kubernetescrd
prometheusSpec:
retentionSize: "45GB"
replicas: 2
storageSpec:
volumeClaimTemplate:
spec:
storageClassName: longhorn-local
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 50Gi
ruleNamespaceSelector:
matchLabels:
prometheus: yolokube
ruleSelectorNilUsesHelmValues: false
serviceMonitorSelectorNilUsesHelmValues: false
podMonitorSelectorNilUsesHelmValues: false
probeSelectorNilUsesHelmValues: false
scrapeConfigSelectorNilUsesHelmValues: false
podAntiAffinity: "hard"
servicePerReplica:
enabled: true
defaultRules:
create: true
customRules:
KubeNodeUnreachable:
for: 0m
severity: "critical"