jsonnet: Update to latest kubernetes-mixin
Pick up new alerts for unreachable nodes.
This commit is contained in:
48
README.md
48
README.md
@@ -573,34 +573,34 @@ You can define ServiceMonitor resources in your `jsonnet` spec. See the snippet
|
||||
```jsonnet
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
prometheus+:: {
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
}
|
||||
},
|
||||
namespace: 'monitoring',
|
||||
prometheus+:: {
|
||||
serviceMonitorMyNamespace: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'my-servicemonitor',
|
||||
namespace: 'my-namespace',
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
serviceMonitorMyNamespace: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'my-servicemonitor',
|
||||
namespace: 'my-namespace',
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http-metrics',
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http-metrics',
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'app': 'myapp',
|
||||
},
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
app: 'myapp',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
||||
_config+:: {
|
||||
namespace: 'monitoring',
|
||||
prometheus+:: {
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
}
|
||||
},
|
||||
namespace: 'monitoring',
|
||||
prometheus+:: {
|
||||
serviceMonitorMyNamespace: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'my-servicemonitor',
|
||||
namespace: 'my-namespace',
|
||||
namespaces+: ['my-namespace', 'my-second-namespace'],
|
||||
},
|
||||
},
|
||||
prometheus+:: {
|
||||
serviceMonitorMyNamespace: {
|
||||
apiVersion: 'monitoring.coreos.com/v1',
|
||||
kind: 'ServiceMonitor',
|
||||
metadata: {
|
||||
name: 'my-servicemonitor',
|
||||
namespace: 'my-namespace',
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http-metrics',
|
||||
},
|
||||
spec: {
|
||||
jobLabel: 'app',
|
||||
endpoints: [
|
||||
{
|
||||
port: 'http-metrics',
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
'app': 'myapp',
|
||||
},
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
matchLabels: {
|
||||
app: 'myapp',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "15ddfa20a6921ffbd43172eb54f6bdc1bcf8d3d3"
|
||||
"version": "a08a6754d527eae13c553bd7565d5f4e9e9f3c0e"
|
||||
},
|
||||
{
|
||||
"name": "grafonnet",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -45,6 +45,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/cluster-total
|
||||
name: grafana-dashboard-cluster-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
readOnly: false
|
||||
@@ -69,6 +72,12 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
readOnly: false
|
||||
@@ -81,6 +90,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/pods
|
||||
name: grafana-dashboard-pods
|
||||
readOnly: false
|
||||
@@ -99,6 +111,9 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
readOnly: false
|
||||
nodeSelector:
|
||||
beta.kubernetes.io/os: linux
|
||||
securityContext:
|
||||
@@ -117,6 +132,9 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-apiserver
|
||||
name: grafana-dashboard-apiserver
|
||||
- configMap:
|
||||
name: grafana-dashboard-cluster-total
|
||||
name: grafana-dashboard-cluster-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-controller-manager
|
||||
name: grafana-dashboard-controller-manager
|
||||
@@ -141,6 +159,12 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
@@ -153,6 +177,9 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
- configMap:
|
||||
name: grafana-dashboard-pod-total
|
||||
name: grafana-dashboard-pod-total
|
||||
- configMap:
|
||||
name: grafana-dashboard-pods
|
||||
name: grafana-dashboard-pods
|
||||
@@ -171,3 +198,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-statefulset
|
||||
name: grafana-dashboard-statefulset
|
||||
- configMap:
|
||||
name: grafana-dashboard-workload-total
|
||||
name: grafana-dashboard-workload-total
|
||||
|
||||
@@ -79,6 +79,22 @@ spec:
|
||||
rate(container_cpu_usage_seconds_total{job="kubelet", image!="", container!="POD"}[5m])
|
||||
) * on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate
|
||||
- expr: |
|
||||
container_memory_working_set_bytes{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_working_set_bytes
|
||||
- expr: |
|
||||
container_memory_rss{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_rss
|
||||
- expr: |
|
||||
container_memory_cache{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_cache
|
||||
- expr: |
|
||||
container_memory_swap{job="kubelet", image!=""}
|
||||
* on (namespace, pod) group_left(node) max by(namespace, pod, node) (kube_pod_info)
|
||||
record: node_namespace_pod_container:container_memory_swap
|
||||
- expr: |
|
||||
sum(container_memory_usage_bytes{job="kubelet", image!="", container!="POD"}) by (namespace)
|
||||
record: namespace:container_memory_usage_bytes:sum
|
||||
@@ -583,6 +599,16 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
message: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}}
|
||||
has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
|
||||
expr: |
|
||||
sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{job="kube-state-metrics"}) > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
message: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
@@ -797,6 +823,14 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeNodeUnreachable
|
||||
annotations:
|
||||
message: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
|
||||
expr: |
|
||||
kube_node_spec_taint{job="kube-state-metrics",key="node.kubernetes.io/unreachable",effect="NoSchedule"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeVersionMismatch
|
||||
annotations:
|
||||
message: There are {{ $value }} different semantic versions of Kubernetes
|
||||
|
||||
Reference in New Issue
Block a user