kube-prometheus/assets/prometheus/rules/kubelet.rules.yaml

groups:
- name: kubelet.rules
  rules:
  - alert: K8SNodeNotReady
    expr: kube_node_status_condition{condition="Ready",status="true"} == 0
    for: 1h
    labels:
      severity: warning
    annotations:
      description: The Kubelet on {{ $labels.node }} has not checked in with the API,
        or has set itself to NotReady, for more than an hour
      summary: Node status is NotReady
  - alert: K8SManyNodesNotReady
    expr: count(kube_node_status_condition{condition="Ready",status="true"} == 0)
      > 1 and (count(kube_node_status_condition{condition="Ready",status="true"} ==
      0) / count(kube_node_status_condition{condition="Ready",status="true"})) * 100 > 20
    for: 1m
    labels:
      severity: critical
    annotations:
      description: '{{ $value }}% of Kubernetes nodes are not ready'
  - alert: K8SKubeletDown
    expr: count(up{job="kubelet"} == 0) / count(up{job="kubelet"}) * 100 > 3
    for: 1h
    labels:
      severity: warning
    annotations:
      description: Prometheus failed to scrape {{ $value }}% of kubelets.
      summary: Prometheus failed to scrape
  - alert: K8SKubeletDown
    expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
      * 100 > 10
    for: 1h
    labels:
      severity: critical
    annotations:
      description: Prometheus failed to scrape {{ $value }}% of kubelets, or all Kubelets
        have disappeared from service discovery.
      summary: Many Kubelets cannot be scraped
  - alert: K8SKubeletTooManyPods
    expr: kubelet_running_pod_count > 100
    for: 10m
    labels:
      severity: warning
    annotations:
      description: Kubelet {{$labels.instance}} is running {{$value}} pods, close
        to the limit of 110
      summary: Kubelet is close to pod limit