61 lines
1.8 KiB
Plaintext
61 lines
1.8 KiB
Plaintext
ALERT K8SNodeNotReady
|
|
IF kube_node_status_condition{condition="Ready", status="true"} == 0
|
|
FOR 1h
|
|
LABELS {
|
|
severity = "warning",
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Node status is NotReady",
|
|
description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour",
|
|
}
|
|
|
|
ALERT K8SManyNodesNotReady
|
|
IF
|
|
count(kube_node_status_condition{condition="Ready", status="true"} == 0) > 1
|
|
AND
|
|
(
|
|
count(kube_node_status_condition{condition="Ready", status="true"} == 0)
|
|
/
|
|
count(kube_node_status_condition{condition="Ready", status="true"})
|
|
) > 0.2
|
|
FOR 1m
|
|
LABELS {
|
|
severity = "critical",
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Many Kubernetes nodes are Not Ready",
|
|
description = "{{ $value }} Kubernetes nodes (more than 10% are in the NotReady state).",
|
|
}
|
|
|
|
ALERT K8SKubeletDown
|
|
IF count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.03
|
|
FOR 1h
|
|
LABELS {
|
|
severity = "warning",
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Many Kubelets cannot be scraped",
|
|
description = "Prometheus failed to scrape {{ $value }}% of kubelets.",
|
|
}
|
|
|
|
ALERT K8SKubeletDown
|
|
IF absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) > 0.1
|
|
FOR 1h
|
|
LABELS {
|
|
severity = "critical",
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Many Kubelets cannot be scraped",
|
|
description = "Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets have disappeared from service discovery.",
|
|
}
|
|
|
|
ALERT K8SKubeletTooManyPods
|
|
IF kubelet_running_pod_count > 100
|
|
LABELS {
|
|
severity = "warning",
|
|
}
|
|
ANNOTATIONS {
|
|
summary = "Kubelet is close to pod limit",
|
|
description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110",
|
|
}
|