From 579dbf34b159c1709f1b63d593aba90925fa72ea Mon Sep 17 00:00:00 2001 From: Lili Cosic Date: Fri, 16 Aug 2019 15:16:13 +0200 Subject: [PATCH 1/3] jsonnet/kube-prometheus: Prevent many-to-many matching If there is more than one prometheus-operator pod, which happens briefly when we delete the prometheus-operator pod, we can see the errors of many-to-many matching, this whitelists the labels matching, and excluded the pod. --- jsonnet/kube-prometheus/alerts/alertmanager.libsonnet | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet index bf58862d..bda69d00 100644 --- a/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet +++ b/jsonnet/kube-prometheus/alerts/alertmanager.libsonnet @@ -10,7 +10,7 @@ message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.', }, expr: ||| - count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 ||| % $._config, 'for': '5m', labels: { @@ -31,8 +31,8 @@ }, }, { - alert:'AlertmanagerMembersInconsistent', - annotations:{ + alert: 'AlertmanagerMembersInconsistent', + annotations: { message: 'Alertmanager has not found all other members of the cluster.', }, expr: ||| From 08e093232eb11e9db281556664d7eb94cf30972f Mon Sep 17 00:00:00 2001 From: Lili Cosic Date: Tue, 27 Aug 2019 17:46:51 +0200 Subject: [PATCH 2/3] jsonnetfile.lock.json: jb update --- jsonnetfile.lock.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 48d91237..033053a6 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -8,7 +8,7 @@ "subdir": "jsonnet/kube-prometheus" } }, - "version": "2fde1a442df6b6b5851c47b3bc5fb537090c570c" + "version": "579dbf34b159c1709f1b63d593aba90925fa72ea" }, { "name": "ksonnet", From b0b287dd15af99b5729240ede34753b0b3b44cb2 Mon Sep 17 00:00:00 2001 From: Lili Cosic Date: Tue, 27 Aug 2019 17:52:43 +0200 Subject: [PATCH 3/3] manifests/prometheus-rules.yaml: Regenerate file --- manifests/prometheus-rules.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/prometheus-rules.yaml b/manifests/prometheus-rules.yaml index 92b131c9..c5b92546 100644 --- a/manifests/prometheus-rules.yaml +++ b/manifests/prometheus-rules.yaml @@ -854,7 +854,7 @@ spec: message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync. expr: | - count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1 + count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1 for: 5m labels: severity: critical