kube-prometheus: add alerting rules
This commit is contained in:
36
assets/prometheus/rules/alertmanager.rules
Normal file
36
assets/prometheus/rules/alertmanager.rules
Normal file
@@ -0,0 +1,36 @@
|
||||
ALERT AlertmanagerConfigInconsistent
|
||||
IF count_values by (service) ("config_hash", alertmanager_config_hash)
|
||||
/ on(service) group_left
|
||||
label_replace(prometheus_operator_alertmanager_spec_replicas, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
||||
FOR 5m
|
||||
LABELS {
|
||||
severity = "critical"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "Alertmanager configurations are inconsistent",
|
||||
description = "The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync."
|
||||
}
|
||||
|
||||
ALERT AlertmanagerDownOrMissing
|
||||
IF label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1", "alertmanager", "(.*)")
|
||||
/ on(job) group_right
|
||||
sum by(job) (up) != 1
|
||||
FOR 5m
|
||||
LABELS {
|
||||
severity = "warning"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "Alertmanager down or not discovered",
|
||||
description = "An unexpected number of Alertmanagers are scraped or Alertmanagers disappeared from discovery."
|
||||
}
|
||||
|
||||
ALERT FailedReload
|
||||
IF alertmanager_config_last_reload_successful == 0
|
||||
FOR 10m
|
||||
LABELS {
|
||||
severity = "warning"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "Alertmanager configuration reload has failed",
|
||||
description = "Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}."
|
||||
}
|
||||
Reference in New Issue
Block a user