Add triggered_total metric to alertmanager controller
Update client_golang for wrappable registerer
This commit is contained in:
@@ -10,7 +10,7 @@
|
|||||||
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
|
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
|
||||||
},
|
},
|
||||||
expr: |||
|
expr: |||
|
||||||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
'for': '5m',
|
'for': '5m',
|
||||||
labels: {
|
labels: {
|
||||||
|
@@ -5,28 +5,15 @@
|
|||||||
name: 'prometheus-operator',
|
name: 'prometheus-operator',
|
||||||
rules: [
|
rules: [
|
||||||
{
|
{
|
||||||
alert: 'PrometheusOperatorAlertmanagerReconcileErrors',
|
alert: 'PrometheusOperatorReconcileErrors',
|
||||||
expr: |||
|
expr: |||
|
||||||
rate(prometheus_operator_alertmanager_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
||||||
||| % $._config,
|
||| % $._config,
|
||||||
labels: {
|
labels: {
|
||||||
severity: 'warning',
|
severity: 'warning',
|
||||||
},
|
},
|
||||||
annotations: {
|
annotations: {
|
||||||
message: 'Errors while reconciling Alertmanager in {{ $labels.namespace }} Namespace.',
|
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
|
||||||
},
|
|
||||||
'for': '10m',
|
|
||||||
},
|
|
||||||
{
|
|
||||||
alert: 'PrometheusOperatorPrometheusReconcileErrors',
|
|
||||||
expr: |||
|
|
||||||
rate(prometheus_operator_prometheus_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
|
|
||||||
||| % $._config,
|
|
||||||
labels: {
|
|
||||||
severity: 'warning',
|
|
||||||
},
|
|
||||||
annotations: {
|
|
||||||
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
|
|
||||||
},
|
},
|
||||||
'for': '10m',
|
'for': '10m',
|
||||||
},
|
},
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "049c48c931bfb3cd72efd313b7a47d2244456db0"
|
"version": "d874b5bc21649dd9d07ab42dd3bdea515038953e"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ksonnet",
|
"name": "ksonnet",
|
||||||
|
@@ -947,7 +947,7 @@ spec:
|
|||||||
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
|
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
|
||||||
are out of sync.
|
are out of sync.
|
||||||
expr: |
|
expr: |
|
||||||
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_alertmanager_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator"}, "service", "alertmanager-$1", "alertmanager", "(.*)") != 1
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@@ -1099,20 +1099,12 @@ spec:
|
|||||||
severity: warning
|
severity: warning
|
||||||
- name: prometheus-operator
|
- name: prometheus-operator
|
||||||
rules:
|
rules:
|
||||||
- alert: PrometheusOperatorAlertmanagerReconcileErrors
|
- alert: PrometheusOperatorReconcileErrors
|
||||||
annotations:
|
annotations:
|
||||||
message: Errors while reconciling Alertmanager in {{ $labels.namespace }}
|
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
|
||||||
Namespace.
|
}} Namespace.
|
||||||
expr: |
|
expr: |
|
||||||
rate(prometheus_operator_alertmanager_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
|
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: PrometheusOperatorPrometheusReconcileErrors
|
|
||||||
annotations:
|
|
||||||
message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
|
|
||||||
expr: |
|
|
||||||
rate(prometheus_operator_prometheus_reconcile_errors_total{job="prometheus-operator"}[5m]) > 0.1
|
|
||||||
for: 10m
|
for: 10m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
Reference in New Issue
Block a user