kube-prometheus: ensure triggering alerts on down targets

2017-06-28 10:50:17 +02:00
parent 4c42ab4fcc
commit a5533a4f6c
7 changed files with 32 additions and 28 deletions
--- a/assets/prometheus/rules/kube-controller-manager.rules
+++ b/assets/prometheus/rules/kube-controller-manager.rules
@@ -1,5 +1,5 @@
 ALERT K8SControllerManagerDown
-  IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
+  IF absent(up{job="kube-controller-manager"} == 1)
  FOR 5m
  LABELS {
    severity = "critical",
@@ -7,4 +7,5 @@ ALERT K8SControllerManagerDown
  ANNOTATIONS {
    summary = "Controller manager is down",
    description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
+    runbook = "https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-controller-manager",
  }