jsonnet: Update to latest kubernetes-mixin
Pick up new alerts for unreachable nodes.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -45,6 +45,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/cluster-total
|
||||
name: grafana-dashboard-cluster-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
readOnly: false
|
||||
@@ -69,6 +72,12 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
readOnly: false
|
||||
@@ -81,6 +90,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/pods
|
||||
name: grafana-dashboard-pods
|
||||
readOnly: false
|
||||
@@ -99,6 +111,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
readOnly: false
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
securityContext:
|
||||
@@ -117,6 +132,9 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
- configMap:
|
||||
name: grafana-dashboard-cluster-total
|
||||
name: grafana-dashboard-cluster-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
@@ -141,6 +159,12 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
@@ -153,6 +177,9 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
- configMap:
|
||||
name: grafana-dashboard-pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-pods
|
||||
name: grafana-dashboard-pods
|
||||
@@ -171,3 +198,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
- configMap:
|
||||
name: grafana-dashboard-workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
|
||||
@@ -79,6 +79,22 @@ spec:
|
||||
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
|
||||
) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||
- expr: |
|
||||
container_memory_working_set_bytes{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||
- expr: |
|
||||
container_memory_rss{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_rss
|
||||
- expr: |
|
||||
container_memory_cache{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_cache
|
||||
- expr: |
|
||||
container_memory_swap{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_swap
|
||||
- expr: |
|
||||
sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
|
||||
record: namespace:container_memory_usage_bytes:sum
|
||||
@@ -583,6 +599,16 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
@@ -797,6 +823,14 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeUnreachable
|
||||
annotations:
|
||||
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
|
||||
expr: |
|
||||
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes
|
||||
|
||||
Reference in New Issue
Block a user