59 lines
2.4 KiB
YAML
59 lines
2.4 KiB
YAML
groups:
|
|
- name: kube-scheduler.rules
|
|
rules:
|
|
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
|
expr: histogram_quantile(0.99, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.99"
|
|
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
|
expr: histogram_quantile(0.9, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.9"
|
|
- record: cluster:scheduler_e2e_scheduling_latency_seconds:quantile
|
|
expr: histogram_quantile(0.5, sum(scheduler_e2e_scheduling_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.5"
|
|
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
|
expr: histogram_quantile(0.99, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.99"
|
|
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
|
expr: histogram_quantile(0.9, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.9"
|
|
- record: cluster:scheduler_scheduling_algorithm_latency_seconds:quantile
|
|
expr: histogram_quantile(0.5, sum(scheduler_scheduling_algorithm_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.5"
|
|
- record: cluster:scheduler_binding_latency_seconds:quantile
|
|
expr: histogram_quantile(0.99, sum(scheduler_binding_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.99"
|
|
- record: cluster:scheduler_binding_latency_seconds:quantile
|
|
expr: histogram_quantile(0.9, sum(scheduler_binding_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.9"
|
|
- record: cluster:scheduler_binding_latency_seconds:quantile
|
|
expr: histogram_quantile(0.5, sum(scheduler_binding_latency_microseconds_bucket)
|
|
BY (le, cluster)) / 1e+06
|
|
labels:
|
|
quantile: "0.5"
|
|
- alert: K8SSchedulerDown
|
|
expr: absent(up{job="kube-scheduler"} == 1)
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: There is no running K8S scheduler. New pods are not being assigned
|
|
to nodes.
|
|
runbook: https://coreos.com/tectonic/docs/latest/troubleshooting/controller-recovery.html#recovering-a-scheduler
|
|
summary: Scheduler is down
|