Files
kube-prometheus/assets/prometheus/rules/alertmanager.rules.yaml

34 lines
1.3 KiB
YAML

groups:
- name: alertmanager.rules
rules:
- alert: AlertmanagerConfigInconsistent
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service)
GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service",
"alertmanager-$1", "alertmanager", "(.*)") != 1
for: 5m
labels:
severity: critical
annotations:
description: The configuration of the instances of the Alertmanager cluster
`{{$labels.service}}` are out of sync.
summary: Configuration out of sync
- alert: AlertmanagerDownOrMissing
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
for: 5m
labels:
severity: warning
annotations:
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
disappeared from discovery.
summary: Alertmanager down or missing
- alert: AlertmanagerFailedReload
expr: alertmanager_config_last_reload_successful == 0
for: 10m
labels:
severity: warning
annotations:
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
}}/{{ $labels.pod}}.
summary: Alertmanager's configuration reload failed