Merge pull request #2 from brancz/k8s-components

monitor and alert on k8s components
This commit is contained in:
Frederic Branczyk
2016-11-02 11:02:56 +01:00
committed by GitHub
8 changed files with 864 additions and 8 deletions

View File

View File

@@ -0,0 +1,398 @@
### Container resources ###
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_memory_limit_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:spec_cpu_shares =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_cpu_shares{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:cpu_usage:rate =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
irate(
container_cpu_usage_seconds_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_working_set:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_working_set_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_rss:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_rss{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_cache:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_cache{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:disk_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_disk_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_pagefaults:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failures_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_oom:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failcnt{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
### Cluster resources ###
cluster:memory_allocation:percent =
100 * sum by (cluster) (
container_spec_memory_limit_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:memory_used:percent =
100 * sum by (cluster) (
container_memory_usage_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:cpu_allocation:percent =
100 * sum by (cluster) (
container_spec_cpu_shares{pod_name!=""}
) / sum by (cluster) (
container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores
)
cluster:node_cpu_use:percent =
100 * sum by (cluster) (
rate(node_cpu{mode!="idle"}[5m])
) / sum by (cluster) (
machine_cpu_cores
)
### API latency ###
# Raw metrics are in microseconds. Convert to seconds.
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(
0.99,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(
0.9,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(
0.5,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
### Scheduling latency ###
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
ALERT K8SNodeDown
IF up{job="kubelets"} == 0
FOR 1h
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubelet cannot be scraped",
description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour",
}
ALERT K8SNodeNotReady
IF kube_node_status_ready{condition="true"} == 0
FOR 1h
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Node status is NotReady",
description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour",
}
ALERT K8SManyNodesNotReady
IF
count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
AND
(
count by (cluster) (kube_node_status_ready{condition="true"} == 0)
/
count by (cluster) (kube_node_status_ready{condition="true"})
) > 0.2
FOR 1m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Many K8s nodes are Not Ready",
description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
}
ALERT K8SKubeletNodeExporterDown
IF up{job="node-exporter"} == 0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubelet node_exporter cannot be scraped",
description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour.",
}
ALERT K8SKubeletDown
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
FOR 1h
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
description = "Prometheus failed to scrape more than 10% of kubelets, or all Kubelets have disappeared from service discovery.",
}
ALERT K8SApiserverDown
IF up{job="kubernetes"} == 0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "An API server could not be scraped.",
}
# Disable for non HA kubernetes setups.
ALERT K8SApiserverDown
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
FOR 5m
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.",
}
ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Scheduler is down",
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
}
ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Controller manager is down",
description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
}
ALERT K8SMoreThanOneController
IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "More than one controller node is active",
description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.",
}
ALERT K8SConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
}
ALERT K8SConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 90
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
}
# To catch the conntrack sysctl de-tuning when it happens
ALERT K8SConntrackTuningMissing
IF node_nf_conntrack_udp_timeout > 10
FOR 10m
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Node does not have the correct conntrack tunings",
description = "Nodes keep un-setting the correct tunings, investigate when it happens.",
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "{{ $labels.job }} has too many open file descriptors",
description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
FOR 10m
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "{{ $labels.job }} has too many open file descriptors",
description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
}
# Some verbs excluded because they are expected to be long-lasting:
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
) / 1e6 > 1.0
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
}
ALERT K8SApiServerEtcdAccessLatency
IF etcd_request_latencies_summary{quantile="0.99"} / 1e6 > 1.0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Access to etcd is slow",
description = "99th percentile latency for apiserver to access etcd is higher than 1s.",
}
ALERT K8SKubeletTooManyPods
IF kubelet_running_pod_count > 100
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Kubelet is close to pod limit",
description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110",
}

View File

@@ -1,7 +1,7 @@
#!/bin/bash
# Generate Alert Rules ConfigMap
kubectl create configmap --dry-run=true prometheus-k8s-alerts --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-alerts.yaml
kubectl create configmap --dry-run=true prometheus-k8s-rules --from-file=assets/alerts/ -oyaml > manifests/prometheus/prometheus-k8s-rules.yaml
# Generate Dashboard ConfigMap
kubectl create configmap --dry-run=true grafana-dashboards --from-file=assets/grafana/ -oyaml > manifests/grafana/grafana-cm.yaml

View File

@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: kube-controller-manager-prometheus-discovery
labels:
k8s-app: kube-controller-manager
annotations:
prometheus.io/scrape: 'true'
spec:
selector:
k8s-app: kube-controller-manager
type: ClusterIP
clusterIP: None
ports:
- name: prometheus
port: 10252
targetPort: 10252
protocol: TCP

View File

@@ -0,0 +1,18 @@
apiVersion: v1
kind: Service
metadata:
name: kube-scheduler-prometheus-discovery
labels:
k8s-app: kube-scheduler
annotations:
prometheus.io/scrape: 'true'
spec:
selector:
k8s-app: kube-scheduler
type: ClusterIP
clusterIP: None
ports:
- name: prometheus
port: 10251
targetPort: 10251
protocol: TCP

View File

@@ -1,7 +0,0 @@
apiVersion: v1
data:
.gitkeep: ""
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-k8s-alerts

View File

@@ -50,3 +50,27 @@ data:
regex: kubernetes
target_label: __scheme__
replacement: https
# Scrapes the endpoint lists for the kube-dns server. Which we consider
# part of a default setup.
- job_name: kube-components
scrape_interval: 20s
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: replace
source_labels: [__meta_kubernetes_service_name]
target_label: job
regex: "kube-(.*)-prometheus-discovery"
replacement: "kube-${1}"
- action: keep
source_labels: [__meta_kubernetes_service_name]
regex: "kube-(.*)-prometheus-discovery"
- action: keep
source_labels: [__meta_kubernetes_endpoint_port_name]
regex: "prometheus"

View File

@@ -0,0 +1,405 @@
apiVersion: v1
data:
kubernetes.rules: |+
### Container resources ###
cluster_namespace_controller_pod_container:spec_memory_limit_bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_memory_limit_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:spec_cpu_shares =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_spec_cpu_shares{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:cpu_usage:rate =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
irate(
container_cpu_usage_seconds_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_working_set:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_working_set_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_rss:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_rss{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_cache:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_memory_cache{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:disk_usage:bytes =
sum by (cluster,namespace,controller,pod_name,container_name) (
label_replace(
container_disk_usage_bytes{container_name!=""},
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_pagefaults:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failures_total{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
cluster_namespace_controller_pod_container:memory_oom:rate =
sum by (cluster,namespace,controller,pod_name,container_name,scope,type) (
label_replace(
irate(
container_memory_failcnt{container_name!=""}[5m]
),
"controller", "$1",
"pod_name", "^(.*)-[a-z0-9]+"
)
)
### Cluster resources ###
cluster:memory_allocation:percent =
100 * sum by (cluster) (
container_spec_memory_limit_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:memory_used:percent =
100 * sum by (cluster) (
container_memory_usage_bytes{pod_name!=""}
) / sum by (cluster) (
machine_memory_bytes
)
cluster:cpu_allocation:percent =
100 * sum by (cluster) (
container_spec_cpu_shares{pod_name!=""}
) / sum by (cluster) (
container_spec_cpu_shares{id="/"} * on(cluster,instance) machine_cpu_cores
)
cluster:node_cpu_use:percent =
100 * sum by (cluster) (
rate(node_cpu{mode!="idle"}[5m])
) / sum by (cluster) (
machine_cpu_cores
)
### API latency ###
# Raw metrics are in microseconds. Convert to seconds.
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(
0.99,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(
0.9,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
cluster_resource_verb:apiserver_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(
0.5,
sum by(le,cluster,job,resource,verb) (apiserver_request_latencies_bucket)
) / 1e6
### Scheduling latency ###
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_e2e_scheduling_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_e2e_scheduling_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_scheduling_algorithm_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_scheduling_algorithm_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.99"} =
histogram_quantile(0.99,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.9"} =
histogram_quantile(0.9,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
cluster:scheduler_binding_latency:quantile_seconds{quantile="0.5"} =
histogram_quantile(0.5,sum by (le,cluster) (scheduler_binding_latency_microseconds_bucket)) / 1e6
ALERT K8SNodeDown
IF up{job="kubelets"} == 0
FOR 1h
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubelet cannot be scraped",
description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour",
}
ALERT K8SNodeNotReady
IF kube_node_status_ready{condition="true"} == 0
FOR 1h
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Node status is NotReady",
description = "The Kubelet on {{ $labels.node }} has not checked in with the API, or has set itself to NotReady, for more than an hour",
}
ALERT K8SManyNodesNotReady
IF
count by (cluster) (kube_node_status_ready{condition="true"} == 0) > 1
AND
(
count by (cluster) (kube_node_status_ready{condition="true"} == 0)
/
count by (cluster) (kube_node_status_ready{condition="true"})
) > 0.2
FOR 1m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Many K8s nodes are Not Ready",
description = "{{ $value }} K8s nodes (more than 10% of cluster {{ $labels.cluster }}) are in the NotReady state.",
}
ALERT K8SKubeletNodeExporterDown
IF up{job="node-exporter"} == 0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubelet node_exporter cannot be scraped",
description = "Prometheus could not scrape a {{ $labels.job }} for more than one hour.",
}
ALERT K8SKubeletDown
IF absent(up{job="kubelets"}) or count by (cluster) (up{job="kubelets"} == 0) / count by (cluster) (up{job="kubelets"}) > 0.1
FOR 1h
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "Many Kubelets cannot be scraped",
description = "Prometheus failed to scrape more than 10% of kubelets, or all Kubelets have disappeared from service discovery.",
}
ALERT K8SApiserverDown
IF up{job="kubernetes"} == 0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "An API server could not be scraped.",
}
# Disable for non HA kubernetes setups.
ALERT K8SApiserverDown
IF absent({job="kubernetes"}) or count by(cluster) (up{job="kubernetes"} == 1) < 2
FOR 5m
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "API server unreachable",
description = "Prometheus failed to scrape multiple API servers, or all API servers have disappeared from service discovery.",
}
ALERT K8SSchedulerDown
IF absent(up{job="kube-scheduler"}) or (count by(cluster) (up{job="kube-scheduler"} == 1) == 0)
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Scheduler is down",
description = "There is no running K8S scheduler. New pods are not being assigned to nodes.",
}
ALERT K8SControllerManagerDown
IF absent(up{job="kube-controller-manager"}) or (count by(cluster) (up{job="kube-controller-manager"} == 1) == 0)
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "Controller manager is down",
description = "There is no running K8S controller manager. Deployments and replication controllers are not making progress.",
}
ALERT K8SMoreThanOneController
IF count by (job,cluster) (up{job=~"kube-scheduler|kube-controller-manager"}) > 1
FOR 5m
LABELS {
service = "k8s",
severity = "critical",
}
ANNOTATIONS {
summary = "More than one controller node is active",
description = "There is more than one {{ $labels.job }} managing the cluster. Cluster behaviour is undefined.",
}
ALERT K8SConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
}
ALERT K8SConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 90
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
}
# To catch the conntrack sysctl de-tuning when it happens
ALERT K8SConntrackTuningMissing
IF node_nf_conntrack_udp_timeout > 10
FOR 10m
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Node does not have the correct conntrack tunings",
description = "Nodes keep un-setting the correct tunings, investigate when it happens.",
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 50
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "{{ $labels.job }} has too many open file descriptors",
description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds{job=~"kubelets|kubernetes"} / process_max_fds > 80
FOR 10m
LABELS {
service = "k8s",
severity = "critical"
}
ANNOTATIONS {
summary = "{{ $labels.job }} has too many open file descriptors",
description = "{{ $labels.node }} is using {{ $value }}% of the available file/socket descriptors.",
}
# Some verbs excluded because they are expected to be long-lasting:
# WATCHLIST is long-poll, CONNECT is `kubectl exec`.
ALERT K8SApiServerLatency
IF histogram_quantile(
0.99,
sum without (instance,node,resource) (apiserver_request_latencies_bucket{verb!~"CONNECT|WATCHLIST"})
) / 1e6 > 1.0
FOR 10m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Kubernetes apiserver latency is high",
description = "99th percentile Latency for {{ $labels.verb }} requests to the kube-apiserver is higher than 1s.",
}
ALERT K8SApiServerEtcdAccessLatency
IF etcd_request_latencies_summary{quantile="0.99"} / 1e6 > 1.0
FOR 15m
LABELS {
service = "k8s",
severity = "warning"
}
ANNOTATIONS {
summary = "Access to etcd is slow",
description = "99th percentile latency for apiserver to access etcd is higher than 1s.",
}
ALERT K8SKubeletTooManyPods
IF kubelet_running_pod_count > 100
LABELS {
service = "k8s",
severity = "warning",
}
ANNOTATIONS {
summary = "Kubelet is close to pod limit",
description = "Kubelet {{$labels.instance}} is running {{$value}} pods, close to the limit of 110",
}
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-k8s-rules