kube-prometheus: ensure triggering alerts on down targets
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
ALERT K8SSchedulerDown
|
||||
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
|
||||
IF absent(up{job="kube-scheduler"} == 1)
|
||||
FOR 5m
|
||||
LABELS {
|
||||
severity = "critical",
|
||||
@@ -7,4 +7,5 @@ ALERT K8SSchedulerDown
|
||||
ANNOTATIONS {
|
||||
summary = "Scheduler is down",
|
||||
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
|
||||
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user