45 lines
1.7 KiB
YAML
45 lines
1.7 KiB
YAML
groups:
|
|
- name: prometheus.rules
|
|
rules:
|
|
- alert: PrometheusConfigReloadFailed
|
|
expr: prometheus_config_last_reload_successful == 0
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
|
- alert: PrometheusNotificationQueueRunningFull
|
|
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
|
$labels.pod}}
|
|
- alert: PrometheusErrorSendingAlerts
|
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
|
> 0.01
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
|
- alert: PrometheusErrorSendingAlerts
|
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
|
> 0.03
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
|
- alert: PrometheusNotConnectedToAlertmanagers
|
|
expr: prometheus_notifications_alertmanagers_discovered < 1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
|
to any Alertmanagers
|