34 lines
1.3 KiB
YAML
34 lines
1.3 KiB
YAML
groups:
|
|
- name: alertmanager.rules
|
|
rules:
|
|
- alert: AlertmanagerConfigInconsistent
|
|
expr: count_values("config_hash", alertmanager_config_hash) BY (service) / ON(service)
|
|
GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas, "service",
|
|
"alertmanager-$1", "alertmanager", "(.*)") != 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: The configuration of the instances of the Alertmanager cluster
|
|
`{{$labels.service}}` are out of sync.
|
|
summary: Configuration out of sync
|
|
- alert: AlertmanagerDownOrMissing
|
|
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
|
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
|
disappeared from discovery.
|
|
summary: Alertmanager down or missing
|
|
- alert: AlertmanagerFailedReload
|
|
expr: alertmanager_config_last_reload_successful == 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
|
}}/{{ $labels.pod}}.
|
|
summary: Alertmanager's configuration reload failed
|