Cut release-0.7

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
This commit is contained in:
Damien Grisonnet
2020-12-09 11:37:59 +01:00
parent 0cfe5bba45
commit 6a05efd636
13 changed files with 63 additions and 54 deletions

View File

@@ -1902,21 +1902,6 @@ spec:
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without(alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- alert: PrometheusNotConnectedToAlertmanagers
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
@@ -1951,7 +1936,15 @@ spec:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
summary: Prometheus is not ingesting samples.
expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
(
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
and
(
sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
or
sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
)
)
for: 10m
labels:
severity: warning
@@ -2001,7 +1994,7 @@ spec:
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
- on(job, instance) group_right
- ignoring(remote_name, url) group_right
max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
)
> 120
@@ -2050,6 +2043,21 @@ spec:
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: |
min without (alertmanager) (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
/
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
)
* 100
> 3
for: 15m
labels:
severity: critical
- name: general.rules
rules:
- alert: TargetDown