Files
kube-prometheus/assets/prometheus/rules/kube-apiserver.rules
2017-06-06 15:22:28 +02:00

27 lines
939 B
Plaintext

ALERT K8SApiserverDown
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "Prometheus failed to scrape API server(s), or all API servers have disappeared from service discovery.",
}
# Some verbs excluded because they are expected to be long-lasting:
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH|PROXY"})
) / 1e6 > 1.0
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
}