Compare commits

...

16 Commits

Author SHA1 Message Date
Frederic Branczyk
a4a143a9ab Merge pull request #268 from kpucynski/grafana-dashboard-fix-0.1
Grafana dashboard update for release-0.1
2019-10-18 14:19:55 +02:00
Karol Pucynski
f5cfc463cf grafana dashboard updates from upstream
Signed-off-by: Karol Pucynski <kpucynski@gmail.com>
2019-10-18 13:20:03 +02:00
Matthias Loibl
f476f3a48c Use local dependency for kube-prometheus jsonnet 2019-10-17 18:24:03 +02:00
Frederic Branczyk
5e1758c453 Merge pull request #234 from minhdanh/fix-kubemismatchversion-incorrect-release-0.1
Fix KubeMismatchVersion false positive alert on Kubernetes < 1.14
2019-09-24 10:54:35 +02:00
Minh Danh
01ad823019 Update jsonnetfile.lock.json 2019-09-24 15:12:37 +07:00
Minh Danh
1a4c759122 Fix KubeMismatchVersion error in GKE 2019-09-24 15:06:51 +07:00
Frederic Branczyk
72edf488bd Merge pull request #197 from prune998/patch-2
use real alertmanager name
2019-08-28 15:40:09 +02:00
Prune Sebastien THOMAS
1d87031c24 use real alertmanager name
the name is build as `alertmanager-main` while it should be `alertmanager-<the user-defined name>`
2019-08-28 06:45:13 -04:00
Frederic Branczyk
79d0151ed4 Merge pull request #196 from LiliC/bringm2m
jsonnet/kube-prometheus: Prevent many-to-many matching
2019-08-28 10:05:42 +02:00
Lili Cosic
b0b287dd15 manifests/prometheus-rules.yaml: Regenerate file 2019-08-27 17:52:43 +02:00
Lili Cosic
08e093232e jsonnetfile.lock.json: jb update 2019-08-27 17:46:51 +02:00
Lili Cosic
579dbf34b1 jsonnet/kube-prometheus: Prevent many-to-many matching
If there is more than one prometheus-operator pod, which happens briefly
when we delete the prometheus-operator pod, we can see the errors of
many-to-many matching, this whitelists the labels matching, and excluded
the pod.
2019-08-27 17:41:32 +02:00
Frederic Branczyk
76e357abfc Merge pull request #129 from brancz/fix-ne-0.1
Fix node-exporter semicolon
2019-06-18 18:09:48 +02:00
Frederic Branczyk
1a2883c9b3 manifests: Re-generate 2019-06-18 17:57:05 +02:00
Frederic Branczyk
2fde1a442d node-exporter: Fix missing semicolon 2019-06-18 17:55:35 +02:00
paulfantom
cfab64f66e make node_exporter pods tolerate every taint 2019-06-18 17:55:20 +02:00
9 changed files with 50 additions and 61 deletions

View File

@@ -10,7 +10,7 @@
message: 'The configuration of the instances of the Alertmanager cluster `{{$labels.service}}` are out of sync.',
},
expr: |||
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
count_values("config_hash", alertmanager_config_hash{%(alertmanagerSelector)s}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{%(prometheusOperatorSelector)s,controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
||| % $._config,
'for': '5m',
labels: {
@@ -31,8 +31,8 @@
},
},
{
alert:'AlertmanagerMembersInconsistent',
annotations:{
alert: 'AlertmanagerMembersInconsistent',
annotations: {
message: 'Alertmanager has not found all other members of the cluster.',
},
expr: |||

View File

@@ -86,7 +86,7 @@ local configMapList = k.core.v1.configMapList;
coreDNSSelector: 'job="kube-dns"',
podLabel: 'pod',
alertmanagerSelector: 'job="alertmanager-main",namespace="' + $._config.namespace + '"',
alertmanagerSelector: 'job="alertmanager-' + $._config.alertmanager.name + '",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"',

View File

@@ -66,14 +66,8 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
local podLabels = { app: 'node-exporter' };
local noExecuteToleration = toleration.new() +
toleration.withOperator('Exists') +
toleration.withEffect('NoExecute');
local noScheduleToleration = toleration.new() +
toleration.withOperator('Exists') +
toleration.withEffect('NoSchedule');
local existsToleration = toleration.new() +
toleration.withOperator('Exists');
local procVolumeName = 'proc';
local procVolume = volume.fromHostPath(procVolumeName, '/proc');
local procVolumeMount = containerVolumeMount.new(procVolumeName, '/host/proc');
@@ -136,7 +130,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
daemonset.mixin.metadata.withLabels(podLabels) +
daemonset.mixin.spec.selector.withMatchLabels(podLabels) +
daemonset.mixin.spec.template.metadata.withLabels(podLabels) +
daemonset.mixin.spec.template.spec.withTolerations([noExecuteToleration, noScheduleToleration]) +
daemonset.mixin.spec.template.spec.withTolerations([existsToleration]) +
daemonset.mixin.spec.template.spec.withNodeSelector({ 'beta.kubernetes.io/os': 'linux' }) +
daemonset.mixin.spec.template.spec.withContainers(c) +
daemonset.mixin.spec.template.spec.withVolumes([procVolume, sysVolume, rootVolume]) +

View File

@@ -3,12 +3,11 @@
{
"name": "kube-prometheus",
"source": {
"git": {
"remote": ".",
"subdir": "jsonnet/kube-prometheus"
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": "."
"version": ""
}
]
}

View File

@@ -3,12 +3,11 @@
{
"name": "kube-prometheus",
"source": {
"git": {
"remote": ".",
"subdir": "jsonnet/kube-prometheus"
"local": {
"directory": "jsonnet/kube-prometheus"
}
},
"version": "e85d2f3b64c65f81aec7093dda880376a6719fe1"
"version": ""
},
{
"name": "ksonnet",
@@ -28,7 +27,7 @@
"subdir": ""
}
},
"version": "af494738e1709998696ffbce9296063a20c80692"
"version": "d0e069002ba767676145fe5e29325720669499c6"
},
{
"name": "grafonnet",
@@ -38,7 +37,7 @@
"subdir": "grafonnet"
}
},
"version": "bcd95ffa00fc4a58d34832f88f4b366effeb63ad"
"version": "47db72da03fc4a7a0658a87791e13c3315a3a252"
},
{
"name": "grafana-builder",
@@ -48,7 +47,7 @@
"subdir": "grafana-builder"
}
},
"version": "de367fc28346fbf5a9afdef887ea20d9ffb7e927"
"version": "3fe9a46d5fe0b70cbcabec1d2054f8ac3b3faae7"
},
{
"name": "grafana",
@@ -58,7 +57,7 @@
"subdir": "grafana"
}
},
"version": "c27d2792764867cdaf6484f067cc875cb8aef2f6"
"version": "a5c2b4da6ca92064604d5a8a893dec07ddead136"
},
{
"name": "prometheus-operator",
@@ -78,7 +77,7 @@
"subdir": "Documentation/etcd-mixin"
}
},
"version": "d6280f9ea54849e5364545ca34bdac0a58317569"
"version": "9c48dfabff597b086cc98f34c960b77a0c569551"
}
]
}

View File

@@ -64,7 +64,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -150,7 +150,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -248,7 +248,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -334,7 +334,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -432,7 +432,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -518,7 +518,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -616,7 +616,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -702,7 +702,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -800,7 +800,7 @@ items:
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{node}}",
"legendLink": "/d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"legendLink": "./d/4ac4f123aae0ff6dbaf4f4f66120033b/k8s-node-rsrc-use",
"step": 10
}
],
@@ -891,7 +891,7 @@ items:
"options": [
],
"query": "label_values(kube_node_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -1834,7 +1834,7 @@ items:
"options": [
],
"query": "label_values(kube_node_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -2602,7 +2602,7 @@ items:
"decimals": 0,
"link": true,
"linkTooltip": "Drill down to pods",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -2620,7 +2620,7 @@ items:
"decimals": 0,
"link": true,
"linkTooltip": "Drill down to workloads",
"linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -2728,7 +2728,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down to pods",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -3021,7 +3021,7 @@ items:
"decimals": 0,
"link": true,
"linkTooltip": "Drill down to pods",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #A",
"thresholds": [
@@ -3039,7 +3039,7 @@ items:
"decimals": 0,
"link": true,
"linkTooltip": "Drill down to workloads",
"linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
"pattern": "Value #B",
"thresholds": [
@@ -3147,7 +3147,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down to pods",
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
"pattern": "namespace",
"thresholds": [
@@ -3324,7 +3324,7 @@ items:
"options": [
],
"query": "label_values(node_cpu_seconds_total, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -3639,7 +3639,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -4058,7 +4058,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -4244,7 +4244,7 @@ items:
"options": [
],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -5207,7 +5207,7 @@ items:
"options": [
],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -5576,7 +5576,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -5941,7 +5941,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
"pattern": "pod",
"thresholds": [
@@ -6100,7 +6100,7 @@ items:
"options": [
],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,
@@ -6514,7 +6514,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
@@ -6924,7 +6924,7 @@ items:
"decimals": 2,
"link": true,
"linkTooltip": "Drill down",
"linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
"pattern": "workload",
"thresholds": [
@@ -7110,7 +7110,7 @@ items:
"options": [
],
"query": "label_values(kube_pod_info, cluster)",
"query": "label_values(:kube_pod_info_node_count:, cluster)",
"refresh": 1,
"regex": "",
"sort": 2,

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2
apiVersion: apps/v1
kind: Deployment
metadata:
labels:

View File

@@ -74,10 +74,7 @@ spec:
runAsUser: 65534
serviceAccountName: node-exporter
tolerations:
- effect: NoExecute
operator: Exists
- effect: NoSchedule
operator: Exists
- operator: Exists
volumes:
- hostPath:
path: /proc

View File

@@ -726,7 +726,7 @@ spec:
components running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
expr: |
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!="kube-dns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
count(count by (gitVersion) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"gitVersion","$1","gitVersion","(v[0-9]*.[0-9]*.[0-9]*).*"))) > 1
for: 1h
labels:
severity: warning
@@ -854,7 +854,7 @@ spec:
message: The configuration of the instances of the Alertmanager cluster `{{$labels.service}}`
are out of sync.
expr: |
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}, "service", "alertmanager-$1", "name", "(.*)") != 1
count_values("config_hash", alertmanager_config_hash{job="alertmanager-main",namespace="monitoring"}) BY (service) / ON(service) GROUP_LEFT() label_replace(max(prometheus_operator_spec_replicas{job="prometheus-operator",namespace="monitoring",controller="alertmanager"}) by (name, job, namespace, controller), "service", "alertmanager-$1", "name", "(.*)") != 1
for: 5m
labels:
severity: critical