Merge pull request #247 from LiliC/bump-mixins

Bump dependencies
This commit is contained in:
Lili Cosic
2019-09-26 15:14:25 +02:00
committed by GitHub
4 changed files with 64 additions and 39 deletions

View File

@@ -27,7 +27,7 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "e3d6d8ebb1789af0e17fb1f60171aaf64926a3a1" "version": "d9b461b0692ddfff6c5d2a189443cfe4beefb3b2"
}, },
{ {
"name": "grafonnet", "name": "grafonnet",
@@ -47,7 +47,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "3c44dfa9bfe2b66985733d4b16e0afd29094b4a0" "version": "2b9b14d0d91adf8781e5b2c9b62dc8cb180a9886"
}, },
{ {
"name": "grafana", "name": "grafana",
@@ -57,7 +57,7 @@
"subdir": "grafana" "subdir": "grafana"
} }
}, },
"version": "c27d2792764867cdaf6484f067cc875cb8aef2f6" "version": "5df496bc1199b40bd066a8c228d94d9653173645"
}, },
{ {
"name": "prometheus-operator", "name": "prometheus-operator",
@@ -77,7 +77,7 @@
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
"version": "7948f39790fbbc979729ca6f990740a20d4a2a76" "version": "efd1fc634b58a629903990e605f2cb9d5633706d"
}, },
{ {
"name": "prometheus", "name": "prometheus",
@@ -87,7 +87,7 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "3638e4ab18ac320c3ed0b607f07aea309dadee45" "version": "08c55c119f39093e18b2bb9cba5c5619dc4ea0e1"
}, },
{ {
"name": "node-mixin", "name": "node-mixin",
@@ -97,7 +97,7 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "e7c2dbed4e0278731b59e9870eb9a9d046047aa8" "version": "27b8c93a5afc21632239890c4558c7300cca17d2"
}, },
{ {
"name": "promgrafonnet", "name": "promgrafonnet",
@@ -107,7 +107,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "24ea0d6e33a415e07ec7b675d74dea3cf01fde73" "version": "d9b461b0692ddfff6c5d2a189443cfe4beefb3b2"
} }
] ]
} }

View File

@@ -5485,7 +5485,7 @@ items:
"unit": "bytes" "unit": "bytes"
}, },
{ {
"alias": "Memory Usage (Swap", "alias": "Memory Usage (Swap)",
"colorMode": null, "colorMode": null,
"colors": [ "colors": [
@@ -19485,7 +19485,7 @@ items:
"options": [ "options": [
], ],
"query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\"}, cluster=\"$cluster\", namespace)", "query": "label_values(kube_statefulset_metadata_generation{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 0, "sort": 0,

View File

@@ -1,4 +1,4 @@
apiVersion: apps/v1beta2 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
labels: labels:

View File

@@ -497,7 +497,7 @@ spec:
state for longer than 15 minutes. state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
expr: | expr: |
sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"}) > 0 sum by (namespace, pod) (kube_pod_status_phase{job="kube-state-metrics", phase=~"Failed|Pending|Unknown"} * on(namespace, pod) group_left(owner_kind) kube_pod_owner{owner_kind!="Job"}) > 0
for: 15m for: 15m
labels: labels:
severity: critical severity: critical
@@ -630,7 +630,33 @@ spec:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. message: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
expr: | expr: |
kube_job_status_failed{job="kube-state-metrics"} > 0 kube_job_failed{job="kube-state-metrics"} > 0
for: 15m
labels:
severity: warning
- alert: KubeHpaReplicasMismatch
annotations:
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the
desired number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
expr: |
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
!=
kube_hpa_status_current_replicas{job="kube-state-metrics"})
and
changes(kube_hpa_status_current_replicas[15m]) == 0
for: 15m
labels:
severity: warning
- alert: KubeHpaMaxedOut
annotations:
message: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at
max replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
expr: |
kube_hpa_status_current_replicas{job="kube-state-metrics"}
==
kube_hpa_spec_max_replicas{job="kube-state-metrics"}
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@@ -761,7 +787,7 @@ spec:
rules: rules:
- alert: KubeNodeNotReady - alert: KubeNodeNotReady
annotations: annotations:
message: '{{ $labels.node }} has been unready for more than an hour.' message: '{{ $labels.node }} has been unready for more than 15 minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodenotready
expr: | expr: |
kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0 kube_node_status_condition{job="kube-state-metrics",condition="Ready",status="true"} == 0
@@ -791,23 +817,13 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: KubeClientErrors
annotations:
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ printf "%0.0f" $value }} errors / second.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
expr: |
sum(rate(ksm_scrape_error_total{job="kube-state-metrics"}[5m])) by (instance, job) > 0.1
for: 15m
labels:
severity: warning
- alert: KubeletTooManyPods - alert: KubeletTooManyPods
annotations: annotations:
message: Kubelet {{ $labels.instance }} is running {{ $value }} Pods, close message: Kubelet '{{ $labels.node }}' is running at {{ printf "%.4g" $value
to the limit of 110. }}% of its Pod capacity.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
expr: | expr: |
kubelet_running_pod_count{job="kubelet"} > 110 * 0.9 100 * max(max(kubelet_running_pod_count{job="kubelet"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 95
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@@ -991,17 +1007,6 @@ spec:
for: 4h for: 4h
labels: labels:
severity: warning severity: warning
- alert: PrometheusTSDBWALCorruptions
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected
{{$value | humanize}} corruptions of the write-ahead log (WAL) over the
last 3h.
summary: Prometheus is detecting WAL corruptions.
expr: |
increase(tsdb_wal_corruptions_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
for: 4h
labels:
severity: warning
- alert: PrometheusNotIngestingSamples - alert: PrometheusNotIngestingSamples
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting
@@ -1015,7 +1020,8 @@ spec:
- alert: PrometheusDuplicateTimestamps - alert: PrometheusDuplicateTimestamps
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with different values but duplicated timestamp. {{ printf "%.4g" $value }} samples/s with different values but duplicated
timestamp.
summary: Prometheus is dropping samples with duplicate timestamps. summary: Prometheus is dropping samples with duplicate timestamps.
expr: | expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1025,7 +1031,7 @@ spec:
- alert: PrometheusOutOfOrderTimestamps - alert: PrometheusOutOfOrderTimestamps
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping
{{$value | humanize}} samples/s with timestamps arriving out of order. {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
summary: Prometheus drops samples with out-of-order timestamps. summary: Prometheus drops samples with out-of-order timestamps.
expr: | expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1069,6 +1075,25 @@ spec:
for: 15m for: 15m
labels: labels:
severity: critical severity: critical
- alert: PrometheusRemoteWriteDesiredShards
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write
desired shards calculation wants to run {{ printf $value }} shards, which
is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
$labels.instance | query | first | value }}.
summary: Prometheus remote write desired shards calculation wants to run more
than configured max shards.
expr: |
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
(
max_over_time(prometheus_remote_storage_shards_desired{job="prometheus-k8s",namespace="monitoring"}[5m])
> on(job, instance) group_right
max_over_time(prometheus_remote_storage_shards_max{job="prometheus-k8s",namespace="monitoring"}[5m])
)
for: 15m
labels:
severity: warning
- alert: PrometheusRuleFailures - alert: PrometheusRuleFailures
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to