Update kubernetes-mixin to remove KubeAPILatencyHigh & KubeAPIErrorsHigh
This commit is contained in:
@@ -300,11 +300,11 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
_config+:: {
|
_config+:: {
|
||||||
namespace: 'monitoring',
|
namespace: 'monitoring',
|
||||||
},
|
},
|
||||||
grafanaDashboards+:: { // monitoring-mixin compatibility
|
grafanaDashboards+:: { // monitoring-mixin compatibility
|
||||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
grafana+:: {
|
grafana+:: {
|
||||||
dashboards+:: { // use this method to import your dashboards to Grafana
|
dashboards+:: { // use this method to import your dashboards to Grafana
|
||||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -2,11 +2,11 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
|
|||||||
_config+:: {
|
_config+:: {
|
||||||
namespace: 'monitoring',
|
namespace: 'monitoring',
|
||||||
},
|
},
|
||||||
grafanaDashboards+:: { // monitoring-mixin compatibility
|
grafanaDashboards+:: { // monitoring-mixin compatibility
|
||||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
grafana+:: {
|
grafana+:: {
|
||||||
dashboards+:: { // use this method to import your dashboards to Grafana
|
dashboards+:: { // use this method to import your dashboards to Grafana
|
||||||
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -69,8 +69,8 @@
|
|||||||
"subdir": ""
|
"subdir": ""
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
|
"version": "fba82a1c0bc225127b084e91bd142c99b1792cb6",
|
||||||
"sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
|
"sum": "hJ5n6OeumIpKYuZQHwxL/rtpAJaW/qTFE9oOA8RWd7w="
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source": {
|
"source": {
|
||||||
|
|||||||
@@ -1129,16 +1129,6 @@ spec:
|
|||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeCronJobRunning
|
|
||||||
annotations:
|
|
||||||
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more
|
|
||||||
than 1h to complete.
|
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
|
|
||||||
expr: |
|
|
||||||
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
|
|
||||||
for: 1h
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: KubeJobCompletion
|
- alert: KubeJobCompletion
|
||||||
annotations:
|
annotations:
|
||||||
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||||
@@ -1240,19 +1230,19 @@ spec:
|
|||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- alert: KubeQuotaExceeded
|
- alert: KubeQuotaFullyUsed
|
||||||
annotations:
|
annotations:
|
||||||
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||||
}} of its {{ $labels.resource }} quota.
|
}} of its {{ $labels.resource }} quota.
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
|
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
|
||||||
expr: |
|
expr: |
|
||||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||||
/ ignoring(instance, job, type)
|
/ ignoring(instance, job, type)
|
||||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||||
> 0.90
|
>= 1
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: info
|
||||||
- alert: CPUThrottlingHigh
|
- alert: CPUThrottlingHigh
|
||||||
annotations:
|
annotations:
|
||||||
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||||
@@ -1391,43 +1381,6 @@ spec:
|
|||||||
short: 6h
|
short: 6h
|
||||||
- name: kubernetes-system-apiserver
|
- name: kubernetes-system-apiserver
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeAPILatencyHigh
|
|
||||||
annotations:
|
|
||||||
message: The API server has an abnormal latency of {{ $value }} seconds for
|
|
||||||
{{ $labels.verb }} {{ $labels.resource }}.
|
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
|
|
||||||
expr: |
|
|
||||||
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
|
|
||||||
>
|
|
||||||
1
|
|
||||||
and on (verb,resource)
|
|
||||||
(
|
|
||||||
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
|
|
||||||
>
|
|
||||||
on (verb) group_left()
|
|
||||||
(
|
|
||||||
avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
|
||||||
+
|
|
||||||
2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
|
||||||
)
|
|
||||||
) > on (verb) group_left()
|
|
||||||
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: KubeAPIErrorsHigh
|
|
||||||
annotations:
|
|
||||||
message: API server is returning errors for {{ $value | humanizePercentage
|
|
||||||
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
|
|
||||||
}}.
|
|
||||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
|
|
||||||
expr: |
|
|
||||||
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
|
|
||||||
/
|
|
||||||
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
- alert: KubeClientCertificateExpiration
|
- alert: KubeClientCertificateExpiration
|
||||||
annotations:
|
annotations:
|
||||||
message: A client certificate used to authenticate to the apiserver is expiring
|
message: A client certificate used to authenticate to the apiserver is expiring
|
||||||
|
|||||||
Reference in New Issue
Block a user