kube-prometheus: ensure triggering alerts on down targets

This commit is contained in:
Frederic Branczyk
2017-06-28 10:50:17 +02:00
parent 4c42ab4fcc
commit a5533a4f6c
7 changed files with 32 additions and 28 deletions

View File

@@ -1,5 +1,5 @@
ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
IF absent(up{job="kube-scheduler"} == 1)
FOR 5m
LABELS {
severity = "critical",
@@ -7,4 +7,5 @@ ALERT K8SSchedulerDown
ANNOTATIONS {
summary = "Scheduler is down",
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler",
}