37 lines
1.3 KiB
Plaintext
37 lines
1.3 KiB
Plaintext
ALERT AlertmanagerConfigInconsistent
|
|
IF count_values by (service) ("config_hash", alertmanager_config_hash)
|
|
/ on(service) group_left
|
|
label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
|
FOR 5m
|
|
LABELS {
|
|
severity = "critical"
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Alertmanager configurations are inconsistent",
|
|
description = "The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync."
|
|
}
|
|
|
|
ALERT AlertmanagerDownOrMissing
|
|
IF label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)")
|
|
/ on(job) group_right
|
|
sum by(job) (up) != 1
|
|
FOR 5m
|
|
LABELS {
|
|
severity = "warning"
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Alertmanager down or not discovered",
|
|
description = "An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery."
|
|
}
|
|
|
|
ALERT FailedReload
|
|
IF alertmanager_config_last_reload_successful == 0
|
|
FOR 10m
|
|
LABELS {
|
|
severity = "warning"
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Alertmanager configuration reload has failed",
|
|
description = "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}."
|
|
}
|