kube-prometheus: add alerting rules

This commit is contained in:
Frederic Branczyk
2017-05-27 10:44:33 +02:00
parent f0851d5e4d
commit c4b382be6f
12 changed files with 828 additions and 598 deletions

View File

@@ -0,0 +1,38 @@
ALERT K8SApiserverDown
IF up{job="apiserver"} == 0
FOR 15m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "An API server could not be scraped.",
}
# Disable for non HA kubernetes setups.
ALERT K8SApiserverDown
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.",
}
# Some verbs excluded because they are expected to be long-lasting:
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
) / 1e6 > 1.0
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
}