Merge pull request #2143 from BcTpe4HbIu/master

Fix AlertmanagerMembersInconsistent rule
This commit is contained in:
Frederic Branczyk
2018-11-23 09:44:04 +01:00
committed by GitHub
4 changed files with 116 additions and 3 deletions

View File

@@ -37,7 +37,7 @@
}, },
expr: ||| expr: |||
alertmanager_cluster_members{%(alertmanagerSelector)s} alertmanager_cluster_members{%(alertmanagerSelector)s}
!= on (service) != on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s}) count by (service) (alertmanager_cluster_members{%(alertmanagerSelector)s})
||| % $._config, ||| % $._config,
'for': '5m', 'for': '5m',

View File

@@ -24,6 +24,10 @@ tests:
- exp_labels: - exp_labels:
service: 'alertmanager-main' service: 'alertmanager-main'
severity: critical severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations: exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.' message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m - eval_time: 17m
@@ -32,6 +36,10 @@ tests:
- exp_labels: - exp_labels:
service: 'alertmanager-main' service: 'alertmanager-main'
severity: critical severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations: exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.' message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m - eval_time: 23m
@@ -40,5 +48,110 @@ tests:
- exp_labels: - exp_labels:
service: 'alertmanager-main' service: 'alertmanager-main'
severity: critical severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- interval: 1m
input_series:
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.0",namespace="monitoring",pod="alertmanager-main-0",service="alertmanager-main"}'
values: '3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.1",namespace="monitoring",pod="alertmanager-main-1",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
- series: 'alertmanager_cluster_members{job="alertmanager-main",instance="10.10.10.2",namespace="monitoring",pod="alertmanager-main-2",service="alertmanager-main"}'
values: '3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2'
alert_rule_test:
- eval_time: 5m
alertname: AlertmanagerMembersInconsistent
- eval_time: 11m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 17m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- eval_time: 23m
alertname: AlertmanagerMembersInconsistent
exp_alerts:
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.0
namespace: monitoring
pod: alertmanager-main-0
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.1
namespace: monitoring
pod: alertmanager-main-1
exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.'
- exp_labels:
service: 'alertmanager-main'
severity: critical
job: 'alertmanager-main'
instance: 10.10.10.2
namespace: monitoring
pod: alertmanager-main-2
exp_annotations: exp_annotations:
message: 'Alertmanager has not found all other members of the cluster.' message: 'Alertmanager has not found all other members of the cluster.'

View File

@@ -8,7 +8,7 @@
"subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus" "subdir": "contrib/kube-prometheus/jsonnet/kube-prometheus"
} }
}, },
"version": "dff8f44fbce268596c86b8d586c64c17953feab3" "version": "9cc151ced4308573a91f4cc3fcdbc951213b03e0"
}, },
{ {
"name": "ksonnet", "name": "ksonnet",

View File

@@ -790,7 +790,7 @@ spec:
message: Alertmanager has not found all other members of the cluster. message: Alertmanager has not found all other members of the cluster.
expr: | expr: |
alertmanager_cluster_members{job="alertmanager-main"} alertmanager_cluster_members{job="alertmanager-main"}
!= on (service) != on (service) GROUP_LEFT()
count by (service) (alertmanager_cluster_members{job="alertmanager-main"}) count by (service) (alertmanager_cluster_members{job="alertmanager-main"})
for: 5m for: 5m
labels: labels: