Cut release-0.7

Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
2020-12-09 11:37:59 +01:00
parent 0cfe5bba45
commit 6a05efd636
13 changed files with 63 additions and 54 deletions
--- a/manifests/prometheus-rules.yaml
+++ b/manifests/prometheus-rules.yaml
@@ -1902,21 +1902,6 @@ spec:
      for: 15m
      labels:
        severity: warning
-    - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
-      annotations:
-        description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
-        summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
-      expr: |
-        min without(alertmanager) (
-          rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
-        /
-          rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
-        )
-        * 100
-        > 3
-      for: 15m
-      labels:
-        severity: critical
    - alert: PrometheusNotConnectedToAlertmanagers
      annotations:
        description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
@@ -1951,7 +1936,15 @@ spec:
        description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
        summary: Prometheus is not ingesting samples.
      expr: |
-        rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
+        (
+          rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
+        and
+          (
+            sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="prometheus-k8s",namespace="monitoring"}) > 0
+          or
+            sum without(rule_group) (prometheus_rule_group_rules{job="prometheus-k8s",namespace="monitoring"}) > 0
+          )
+        )
      for: 10m
      labels:
        severity: warning
@@ -2001,7 +1994,7 @@ spec:
        # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
        (
          max_over_time(prometheus_remote_storage_highest_timestamp_in_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
-        - on(job, instance) group_right
+        - ignoring(remote_name, url) group_right
          max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="prometheus-k8s",namespace="monitoring"}[5m])
        )
        > 120
@@ -2050,6 +2043,21 @@ spec:
      for: 15m
      labels:
        severity: warning
+    - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
+      annotations:
+        description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
+        summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
+      expr: |
+        min without (alertmanager) (
+          rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+        /
+          rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
+        )
+        * 100
+        > 3
+      for: 15m
+      labels:
+        severity: critical
  - name: general.rules
    rules:
    - alert: TargetDown