kube-prometheus: add alerting rules
This commit is contained in:
38
assets/prometheus/rules/kube-apiserver.rules
Normal file
38
assets/prometheus/rules/kube-apiserver.rules
Normal file
@@ -0,0 +1,38 @@
|
||||
ALERT K8SApiserverDown
|
||||
IF up{job="apiserver"} == 0
|
||||
FOR 15m
|
||||
LABELS {
|
||||
severity = "warning"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "API server unreachable",
|
||||
description = "An API server could not be scraped.",
|
||||
}
|
||||
|
||||
# Disable for non HA kubernetes setups.
|
||||
ALERT K8SApiserverDown
|
||||
IF absent({job="apiserver"}) or (count by(cluster) (up{job="apiserver"} == 1) < count by(cluster) (up{job="apiserver"}))
|
||||
FOR 5m
|
||||
LABELS {
|
||||
severity = "critical"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "API server unreachable",
|
||||
description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.",
|
||||
}
|
||||
|
||||
# Some verbs excluded because they are expected to be long-lasting:
|
||||
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
|
||||
ALERT K8SApiServerLatency
|
||||
IF histogram_quantile(
|
||||
0.99,
|
||||
sum without (instance,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST|WATCH"})
|
||||
) / 1e6 > 1.0
|
||||
FOR 10m
|
||||
LABELS {
|
||||
severity = "warning"
|
||||
}
|
||||
ANNOTATIONS {
|
||||
summary = "Kubernetes apiserver latency is high",
|
||||
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
|
||||
}
|
||||
Reference in New Issue
Block a user