jsonnet: Update to latest kubernetes-mixin

Pick up new alerts for unreachable nodes.
This commit is contained in:
Clayton Coleman
2019-10-25 00:14:24 -04:00
parent 5686d7b439
commit 6e0ca7565f
6 changed files with 11599 additions and 100 deletions

View File

@@ -79,6 +79,22 @@ spec:
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
- expr: |
container_memory_working_set_bytes{job="kubelet", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_working_set_bytes
- expr: |
container_memory_rss{job="kubelet", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_rss
- expr: |
container_memory_cache{job="kubelet", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_cache
- expr: |
container_memory_swap{job="kubelet", image!=""}
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
record: node_namespace_pod_container:container_memory_swap
- expr: |
sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
record: namespace:container_memory_usage_bytes:sum
@@ -583,6 +599,16 @@ spec:
for: 15m
labels:
severity: critical
- alert: KubeContainerWaiting
annotations:
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
has been in waiting state for longer than 1 hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
expr: |
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
@@ -797,6 +823,14 @@ spec:
for: 15m
labels:
severity: warning
- alert: KubeNodeUnreachable
annotations:
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
expr: |
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
labels:
severity: warning
- alert: KubeVersionMismatch
annotations:
message: There are {{ $value }} different semantic versions of Kubernetes