Add summary to Alertmanager rules where missing - updated accoring to guidelines
This commit is contained in:
@@ -11,6 +11,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: The configuration of the instances of the Alertmanager cluster
|
description: The configuration of the instances of the Alertmanager cluster
|
||||||
`{{$labels.service}}` are out of sync.
|
`{{$labels.service}}` are out of sync.
|
||||||
|
summary: Configuration out of sync
|
||||||
- alert: AlertmanagerDownOrMissing
|
- alert: AlertmanagerDownOrMissing
|
||||||
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
||||||
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
||||||
@@ -20,6 +21,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
||||||
disappeared from discovery.
|
disappeared from discovery.
|
||||||
|
summary: Alertmanager down or missing
|
||||||
- alert: AlertmanagerFailedReload
|
- alert: AlertmanagerFailedReload
|
||||||
expr: alertmanager_config_last_reload_successful == 0
|
expr: alertmanager_config_last_reload_successful == 0
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -28,3 +30,4 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||||
}}/{{ $labels.pod}}.
|
}}/{{ $labels.pod}}.
|
||||||
|
summary: Alertmanager's configuration reload failed
|
||||||
|
@@ -26,6 +26,7 @@ groups:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||||
|
summary: Prometheus failed to scrape
|
||||||
- alert: K8SKubeletDown
|
- alert: K8SKubeletDown
|
||||||
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
||||||
* 100 > 10
|
* 100 > 10
|
||||||
|
@@ -51,6 +51,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}
|
for {{$labels.verb}} {{$labels.resource}}
|
||||||
|
summary: API server high latency
|
||||||
- alert: APIServerLatencyHigh
|
- alert: APIServerLatencyHigh
|
||||||
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
||||||
> 4
|
> 4
|
||||||
@@ -60,6 +61,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}
|
for {{$labels.verb}} {{$labels.resource}}
|
||||||
|
summary: API server high latency
|
||||||
- alert: APIServerErrorsHigh
|
- alert: APIServerErrorsHigh
|
||||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||||
* 100 > 2
|
* 100 > 2
|
||||||
@@ -68,6 +70,7 @@ groups:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: API server returns errors for {{ $value }}% of requests
|
description: API server returns errors for {{ $value }}% of requests
|
||||||
|
summary: API server request errors
|
||||||
- alert: APIServerErrorsHigh
|
- alert: APIServerErrorsHigh
|
||||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||||
* 100 > 5
|
* 100 > 5
|
||||||
@@ -84,12 +87,14 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: No API servers are reachable or all have disappeared from service
|
description: No API servers are reachable or all have disappeared from service
|
||||||
discovery
|
discovery
|
||||||
|
summary: No API servers are reachable
|
||||||
|
|
||||||
- alert: K8sCertificateExpirationNotice
|
- alert: K8sCertificateExpirationNotice
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubernetes API Certificate is expiring soon (less than 7 days)
|
description: Kubernetes API Certificate is expiring soon (less than 7 days)
|
||||||
|
summary: Kubernetes API Certificate is expiering soon
|
||||||
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
|
||||||
|
|
||||||
- alert: K8sCertificateExpirationNotice
|
- alert: K8sCertificateExpirationNotice
|
||||||
@@ -97,4 +102,5 @@ groups:
|
|||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubernetes API Certificate is expiring in less than 1 day
|
description: Kubernetes API Certificate is expiring in less than 1 day
|
||||||
|
summary: Kubernetes API Certificate is expiering
|
||||||
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
|
||||||
|
@@ -26,6 +26,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus could not scrape a node-exporter for more than 10m,
|
description: Prometheus could not scrape a node-exporter for more than 10m,
|
||||||
or node-exporters have disappeared from discovery
|
or node-exporters have disappeared from discovery
|
||||||
|
summary: Prometheus could not scrape a node-exporter
|
||||||
- alert: NodeDiskRunningFull
|
- alert: NodeDiskRunningFull
|
||||||
expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
|
expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
|
||||||
for: 30m
|
for: 30m
|
||||||
@@ -42,3 +43,4 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
||||||
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
||||||
|
summary: Node disk is running full
|
||||||
|
@@ -8,6 +8,7 @@ groups:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
||||||
|
summary: Reloading Promehteus' configuration failed
|
||||||
|
|
||||||
- alert: PrometheusNotificationQueueRunningFull
|
- alert: PrometheusNotificationQueueRunningFull
|
||||||
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
||||||
@@ -17,6 +18,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
||||||
$labels.pod}}
|
$labels.pod}}
|
||||||
|
summary: Prometheus' alert notification queue is running full
|
||||||
|
|
||||||
- alert: PrometheusErrorSendingAlerts
|
- alert: PrometheusErrorSendingAlerts
|
||||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||||
@@ -27,6 +29,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||||
|
summary: Errors while sending alert from Prometheus
|
||||||
|
|
||||||
- alert: PrometheusErrorSendingAlerts
|
- alert: PrometheusErrorSendingAlerts
|
||||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||||
@@ -37,6 +40,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||||
|
summary: Errors while sending alerts from Prometheus
|
||||||
|
|
||||||
- alert: PrometheusNotConnectedToAlertmanagers
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||||||
expr: prometheus_notifications_alertmanagers_discovered < 1
|
expr: prometheus_notifications_alertmanagers_discovered < 1
|
||||||
@@ -46,6 +50,7 @@ groups:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
||||||
to any Alertmanagers
|
to any Alertmanagers
|
||||||
|
summary: Prometheus is not connected to any Alertmanagers
|
||||||
|
|
||||||
- alert: PrometheusTSDBReloadsFailing
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
|
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
|
||||||
|
@@ -20,6 +20,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: The configuration of the instances of the Alertmanager cluster
|
description: The configuration of the instances of the Alertmanager cluster
|
||||||
`{{$labels.service}}` are out of sync.
|
`{{$labels.service}}` are out of sync.
|
||||||
|
summary: Configuration out of sync
|
||||||
- alert: AlertmanagerDownOrMissing
|
- alert: AlertmanagerDownOrMissing
|
||||||
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
expr: label_replace(prometheus_operator_alertmanager_spec_replicas, "job", "alertmanager-$1",
|
||||||
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
"alertmanager", "(.*)") / ON(job) GROUP_RIGHT() sum(up) BY (job) != 1
|
||||||
@@ -29,6 +30,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
description: An unexpected number of Alertmanagers are scraped or Alertmanagers
|
||||||
disappeared from discovery.
|
disappeared from discovery.
|
||||||
|
summary: Alertmanager down or missing
|
||||||
- alert: AlertmanagerFailedReload
|
- alert: AlertmanagerFailedReload
|
||||||
expr: alertmanager_config_last_reload_successful == 0
|
expr: alertmanager_config_last_reload_successful == 0
|
||||||
for: 10m
|
for: 10m
|
||||||
@@ -37,6 +39,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
description: Reloading Alertmanager's configuration has failed for {{ $labels.namespace
|
||||||
}}/{{ $labels.pod}}.
|
}}/{{ $labels.pod}}.
|
||||||
|
summary: Alertmanager's configuration reload failed
|
||||||
etcd3.rules.yaml: |+
|
etcd3.rules.yaml: |+
|
||||||
groups:
|
groups:
|
||||||
- name: ./etcd3.rules
|
- name: ./etcd3.rules
|
||||||
@@ -363,6 +366,7 @@ data:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
description: Prometheus failed to scrape {{ $value }}% of kubelets.
|
||||||
|
summary: Prometheus failed to scrape
|
||||||
- alert: K8SKubeletDown
|
- alert: K8SKubeletDown
|
||||||
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
expr: (absent(up{job="kubelet"} == 1) or count(up{job="kubelet"} == 0) / count(up{job="kubelet"}))
|
||||||
* 100 > 10
|
* 100 > 10
|
||||||
@@ -436,6 +440,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}
|
for {{$labels.verb}} {{$labels.resource}}
|
||||||
|
summary: API server high latency
|
||||||
- alert: APIServerLatencyHigh
|
- alert: APIServerLatencyHigh
|
||||||
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
expr: apiserver_latency_seconds:quantile{quantile="0.99",subresource!="log",verb!~"^(?:WATCH|WATCHLIST|PROXY|CONNECT)$"}
|
||||||
> 4
|
> 4
|
||||||
@@ -445,6 +450,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
description: the API server has a 99th percentile latency of {{ $value }} seconds
|
||||||
for {{$labels.verb}} {{$labels.resource}}
|
for {{$labels.verb}} {{$labels.resource}}
|
||||||
|
summary: API server high latency
|
||||||
- alert: APIServerErrorsHigh
|
- alert: APIServerErrorsHigh
|
||||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||||
* 100 > 2
|
* 100 > 2
|
||||||
@@ -453,6 +459,7 @@ data:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: API server returns errors for {{ $value }}% of requests
|
description: API server returns errors for {{ $value }}% of requests
|
||||||
|
summary: API server request errors
|
||||||
- alert: APIServerErrorsHigh
|
- alert: APIServerErrorsHigh
|
||||||
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
expr: rate(apiserver_request_count{code=~"^(?:5..)$"}[5m]) / rate(apiserver_request_count[5m])
|
||||||
* 100 > 5
|
* 100 > 5
|
||||||
@@ -469,12 +476,14 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: No API servers are reachable or all have disappeared from service
|
description: No API servers are reachable or all have disappeared from service
|
||||||
discovery
|
discovery
|
||||||
|
summary: No API servers are reachable
|
||||||
|
|
||||||
- alert: K8sCertificateExpirationNotice
|
- alert: K8sCertificateExpirationNotice
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubernetes API Certificate is expiring soon (less than 7 days)
|
description: Kubernetes API Certificate is expiring soon (less than 7 days)
|
||||||
|
summary: Kubernetes API Certificate is expiering soon
|
||||||
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="604800"}) > 0
|
||||||
|
|
||||||
- alert: K8sCertificateExpirationNotice
|
- alert: K8sCertificateExpirationNotice
|
||||||
@@ -482,6 +491,7 @@ data:
|
|||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
description: Kubernetes API Certificate is expiring in less than 1 day
|
description: Kubernetes API Certificate is expiring in less than 1 day
|
||||||
|
summary: Kubernetes API Certificate is expiering
|
||||||
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
|
expr: sum(apiserver_client_certificate_expiration_seconds_bucket{le="86400"}) > 0
|
||||||
node.rules.yaml: |+
|
node.rules.yaml: |+
|
||||||
groups:
|
groups:
|
||||||
@@ -512,6 +522,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus could not scrape a node-exporter for more than 10m,
|
description: Prometheus could not scrape a node-exporter for more than 10m,
|
||||||
or node-exporters have disappeared from discovery
|
or node-exporters have disappeared from discovery
|
||||||
|
summary: Prometheus could not scrape a node-exporter
|
||||||
- alert: NodeDiskRunningFull
|
- alert: NodeDiskRunningFull
|
||||||
expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
|
expr: predict_linear(node_filesystem_free[6h], 3600 * 24) < 0
|
||||||
for: 30m
|
for: 30m
|
||||||
@@ -528,6 +539,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
description: device {{$labels.device}} on node {{$labels.instance}} is running
|
||||||
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
full within the next 2 hours (mounted at {{$labels.mountpoint}})
|
||||||
|
summary: Node disk is running full
|
||||||
prometheus.rules.yaml: |+
|
prometheus.rules.yaml: |+
|
||||||
groups:
|
groups:
|
||||||
- name: prometheus.rules
|
- name: prometheus.rules
|
||||||
@@ -539,6 +551,7 @@ data:
|
|||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
description: Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}
|
||||||
|
summary: Reloading Promehteus' configuration failed
|
||||||
|
|
||||||
- alert: PrometheusNotificationQueueRunningFull
|
- alert: PrometheusNotificationQueueRunningFull
|
||||||
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
expr: predict_linear(prometheus_notifications_queue_length[5m], 60 * 30) > prometheus_notifications_queue_capacity
|
||||||
@@ -548,6 +561,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
description: Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{
|
||||||
$labels.pod}}
|
$labels.pod}}
|
||||||
|
summary: Prometheus' alert notification queue is running full
|
||||||
|
|
||||||
- alert: PrometheusErrorSendingAlerts
|
- alert: PrometheusErrorSendingAlerts
|
||||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||||
@@ -558,6 +572,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||||
|
summary: Errors while sending alert from Prometheus
|
||||||
|
|
||||||
- alert: PrometheusErrorSendingAlerts
|
- alert: PrometheusErrorSendingAlerts
|
||||||
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
expr: rate(prometheus_notifications_errors_total[5m]) / rate(prometheus_notifications_sent_total[5m])
|
||||||
@@ -568,6 +583,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
description: Errors while sending alerts from Prometheus {{$labels.namespace}}/{{
|
||||||
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
$labels.pod}} to Alertmanager {{$labels.Alertmanager}}
|
||||||
|
summary: Errors while sending alerts from Prometheus
|
||||||
|
|
||||||
- alert: PrometheusNotConnectedToAlertmanagers
|
- alert: PrometheusNotConnectedToAlertmanagers
|
||||||
expr: prometheus_notifications_alertmanagers_discovered < 1
|
expr: prometheus_notifications_alertmanagers_discovered < 1
|
||||||
@@ -577,6 +593,7 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
description: Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected
|
||||||
to any Alertmanagers
|
to any Alertmanagers
|
||||||
|
summary: Prometheus is not connected to any Alertmanagers
|
||||||
|
|
||||||
- alert: PrometheusTSDBReloadsFailing
|
- alert: PrometheusTSDBReloadsFailing
|
||||||
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
|
expr: increase(prometheus_tsdb_reloads_failures_total[2h]) > 0
|
||||||
|
Reference in New Issue
Block a user