Grafana dashboards update
This commit is contained in:
@@ -27,7 +27,7 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "d9b461b0692ddfff6c5d2a189443cfe4beefb3b2"
|
||||
"version": "3da19e5a40fbb9f7a621958040472db918c4de9c"
|
||||
},
|
||||
{
|
||||
"name": "grafonnet",
|
||||
@@ -47,7 +47,7 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "2b9b14d0d91adf8781e5b2c9b62dc8cb180a9886"
|
||||
"version": "e59d64a96a73e65ba53ba7fe05c9598974cc4a52"
|
||||
},
|
||||
{
|
||||
"name": "grafana",
|
||||
@@ -57,7 +57,7 @@
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "5df496bc1199b40bd066a8c228d94d9653173645"
|
||||
"version": "567be6b15b7f3b747c48dc7b111c1860cab121c7"
|
||||
},
|
||||
{
|
||||
"name": "prometheus-operator",
|
||||
@@ -77,7 +77,7 @@
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "efd1fc634b58a629903990e605f2cb9d5633706d"
|
||||
"version": "3ef2ad8e115449a7004b628a873e2629855ed468"
|
||||
},
|
||||
{
|
||||
"name": "prometheus",
|
||||
@@ -87,7 +87,7 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "08c55c119f39093e18b2bb9cba5c5619dc4ea0e1"
|
||||
"version": "b05b5f9a300b0209689c06d70f676291f23774c4"
|
||||
},
|
||||
{
|
||||
"name": "node-mixin",
|
||||
@@ -97,7 +97,7 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "27b8c93a5afc21632239890c4558c7300cca17d2"
|
||||
"version": "9f49fff79ef85fcebb69289622150e6d5346528b"
|
||||
},
|
||||
{
|
||||
"name": "promgrafonnet",
|
||||
@@ -107,7 +107,7 @@
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "d9b461b0692ddfff6c5d2a189443cfe4beefb3b2"
|
||||
"version": "3da19e5a40fbb9f7a621958040472db918c4de9c"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
@@ -22,6 +22,7 @@ resources:
|
||||
- ./manifests/grafana-dashboardDefinitions.yaml
|
||||
- ./manifests/grafana-dashboardSources.yaml
|
||||
- ./manifests/grafana-deployment.yaml
|
||||
- ./manifests/grafana-rawDashboardDefinitions.yaml
|
||||
- ./manifests/grafana-service.yaml
|
||||
- ./manifests/grafana-serviceAccount.yaml
|
||||
- ./manifests/grafana-serviceMonitor.yaml
|
||||
|
@@ -3093,7 +3093,7 @@ items:
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #A",
|
||||
"thresholds": [
|
||||
|
||||
@@ -3111,7 +3111,7 @@ items:
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to workloads",
|
||||
"linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #B",
|
||||
"thresholds": [
|
||||
|
||||
@@ -3219,7 +3219,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
"thresholds": [
|
||||
|
||||
@@ -3512,7 +3512,7 @@ items:
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #A",
|
||||
"thresholds": [
|
||||
|
||||
@@ -3530,7 +3530,7 @@ items:
|
||||
"decimals": 0,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to workloads",
|
||||
"linkUrl": "/d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"linkUrl": "./d/a87fb0d919ec0ea5f6543124e16c42a5/k8s-resources-workloads-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell_1",
|
||||
"pattern": "Value #B",
|
||||
"thresholds": [
|
||||
|
||||
@@ -3638,7 +3638,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down to pods",
|
||||
"linkUrl": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"linkUrl": "./d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$__cell",
|
||||
"pattern": "namespace",
|
||||
"thresholds": [
|
||||
|
||||
@@ -4130,7 +4130,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
@@ -4549,7 +4549,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
@@ -6067,7 +6067,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
@@ -6432,7 +6432,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"linkUrl": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell",
|
||||
"pattern": "pod",
|
||||
"thresholds": [
|
||||
|
||||
@@ -7005,7 +7005,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"pattern": "workload",
|
||||
"thresholds": [
|
||||
|
||||
@@ -7415,7 +7415,7 @@ items:
|
||||
"decimals": 2,
|
||||
"link": true,
|
||||
"linkTooltip": "Drill down",
|
||||
"linkUrl": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"linkUrl": "./d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-workload=$__cell&var-type=$__cell_2",
|
||||
"pattern": "workload",
|
||||
"thresholds": [
|
||||
|
||||
|
@@ -242,7 +242,7 @@ spec:
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} < 0.4
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
@@ -260,7 +260,7 @@ spec:
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} < 0.2
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
@@ -308,7 +308,7 @@ spec:
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} < 0.4
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
@@ -326,7 +326,7 @@ spec:
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} < 0.2
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
@@ -573,13 +573,13 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
message: Only {{ $value }}% of the desired Pods of DaemonSet {{ $labels.namespace
|
||||
}}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
message: Only {{ $value | humanizePercentage }} of the desired Pods of DaemonSet
|
||||
{{ $labels.namespace }}/{{ $labels.daemonset }} are scheduled and ready.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
|
||||
expr: |
|
||||
kube_daemonset_status_number_ready{job="kube-state-metrics"}
|
||||
/
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} * 100 < 100
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics"} < 1.00
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -718,25 +718,28 @@ spec:
|
||||
severity: warning
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
message: Namespace {{ $labels.namespace }} is using {{ printf "%0.0f" $value
|
||||
}}% of its {{ $labels.resource }} quota.
|
||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
||||
expr: |
|
||||
100 * kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 90
|
||||
> 0.90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
message: '{{ printf "%0.0f" $value }}% throttling of CPU in namespace {{ $labels.namespace
|
||||
}} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
|
||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
|
||||
$labels.pod }}.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
|
||||
expr: "100 * sum(increase(container_cpu_cfs_throttled_periods_total{container!=\"\",
|
||||
}[5m])) by (container, pod, namespace)\n /\nsum(increase(container_cpu_cfs_periods_total{}[5m]))
|
||||
by (container, pod, namespace)\n > 25 \n"
|
||||
expr: |
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container, pod, namespace)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container, pod, namespace)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -745,14 +748,14 @@ spec:
|
||||
- alert: KubePersistentVolumeUsageCritical
|
||||
annotations:
|
||||
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ printf "%0.2f" $value
|
||||
}}% free.
|
||||
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
|
||||
}} free.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
|
||||
expr: |
|
||||
100 * kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||
kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet"}
|
||||
< 3
|
||||
< 0.03
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -760,14 +763,14 @@ spec:
|
||||
annotations:
|
||||
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} is expected to fill up within four
|
||||
days. Currently {{ printf "%0.2f" $value }}% is available.
|
||||
days. Currently {{ $value | humanizePercentage }} is available.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
|
||||
expr: |
|
||||
100 * (
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet"}
|
||||
) < 15
|
||||
) < 0.15
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0
|
||||
for: 5m
|
||||
@@ -807,23 +810,23 @@ spec:
|
||||
- alert: KubeClientErrors
|
||||
annotations:
|
||||
message: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
|
||||
}}' is experiencing {{ printf "%0.0f" $value }}% errors.'
|
||||
}}' is experiencing {{ $value | humanizePercentage }} errors.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
|
||||
expr: |
|
||||
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
|
||||
/
|
||||
sum(rate(rest_client_requests_total[5m])) by (instance, job))
|
||||
* 100 > 1
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeletTooManyPods
|
||||
annotations:
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ printf "%.4g" $value
|
||||
}}% of its Pod capacity.
|
||||
message: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
|
||||
}} of its Pod capacity.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
|
||||
expr: |
|
||||
100 * max(max(kubelet_running_pod_count{job="kubelet"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 95
|
||||
max(max(kubelet_running_pod_count{job="kubelet"}) by(instance) * on(instance) group_left(node) kubelet_node_name{job="kubelet"}) by(node) / max(kube_node_status_capacity_pods{job="kube-state-metrics"}) by(node) > 0.95
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -849,47 +852,51 @@ spec:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 3
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.03
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m]))
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) * 100 > 1
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests for
|
||||
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 10
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPIErrorsHigh
|
||||
annotations:
|
||||
message: API server is returning errors for {{ $value }}% of requests for
|
||||
{{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource }}.
|
||||
message: API server is returning errors for {{ $value | humanizePercentage
|
||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
||||
}}.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
||||
expr: |
|
||||
sum(rate(apiserver_request_total{job="apiserver",code=~"^(?:5..)$"}[5m])) by (resource,subresource,verb)
|
||||
/
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) * 100 > 5
|
||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
Reference in New Issue
Block a user