Update kubernetes-mixin to remove KubeAPILatencyHigh & KubeAPIErrorsHigh

This commit is contained in:
Matthias Loibl
2020-06-29 19:23:36 +02:00
parent 2c1fc1cc11
commit ea7a834755
4 changed files with 10 additions and 57 deletions

View File

@@ -300,11 +300,11 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: { // monitoring-mixin compatibility
grafanaDashboards+:: { // monitoring-mixin compatibility
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
grafana+:: {
dashboards+:: { // use this method to import your dashboards to Grafana
dashboards+:: { // use this method to import your dashboards to Grafana
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
},

View File

@@ -2,11 +2,11 @@ local kp = (import 'kube-prometheus/kube-prometheus.libsonnet') + {
_config+:: {
namespace: 'monitoring',
},
grafanaDashboards+:: { // monitoring-mixin compatibility
grafanaDashboards+:: { // monitoring-mixin compatibility
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
grafana+:: {
dashboards+:: { // use this method to import your dashboards to Grafana
dashboards+:: { // use this method to import your dashboards to Grafana
'my-dashboard.json': (import 'example-grafana-dashboard.json'),
},
},

View File

@@ -69,8 +69,8 @@
"subdir": ""
}
},
"version": "b61c5a34051f8f57284a08fe78ad8a45b430252b",
"sum": "7Hx/5eNm7ubLTsdrpk3b2+e/FLR3XOa4HCukmbRUCAY="
"version": "fba82a1c0bc225127b084e91bd142c99b1792cb6",
"sum": "hJ5n6OeumIpKYuZQHwxL/rtpAJaW/qTFE9oOA8RWd7w="
},
{
"source": {

View File

@@ -1129,16 +1129,6 @@ spec:
for: 15m
labels:
severity: warning
- alert: KubeCronJobRunning
annotations:
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more
than 1h to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
expr: |
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
for: 1h
labels:
severity: warning
- alert: KubeJobCompletion
annotations:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
@@ -1240,19 +1230,19 @@ spec:
for: 5m
labels:
severity: warning
- alert: KubeQuotaExceeded
- alert: KubeQuotaFullyUsed
annotations:
message: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
expr: |
kube_resourcequota{job="kube-state-metrics", type="used"}
/ ignoring(instance, job, type)
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
> 0.90
>= 1
for: 15m
labels:
severity: warning
severity: info
- alert: CPUThrottlingHigh
annotations:
message: '{{ $value | humanizePercentage }} throttling of CPU in namespace
@@ -1391,43 +1381,6 @@ spec:
short: 6h
- name: kubernetes-system-apiserver
rules:
- alert: KubeAPILatencyHigh
annotations:
message: The API server has an abnormal latency of {{ $value }} seconds for
{{ $labels.verb }} {{ $labels.resource }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapilatencyhigh
expr: |
cluster_quantile:apiserver_request_duration_seconds:histogram_quantile{job="apiserver",quantile="0.99"}
>
1
and on (verb,resource)
(
cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"}
>
on (verb) group_left()
(
avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
+
2*stddev by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
)
) > on (verb) group_left()
1.2 * avg by (verb) (cluster:apiserver_request_duration_seconds:mean5m{job="apiserver"} >= 0)
for: 5m
labels:
severity: warning
- alert: KubeAPIErrorsHigh
annotations:
message: API server is returning errors for {{ $value | humanizePercentage
}} of requests for {{ $labels.verb }} {{ $labels.resource }} {{ $labels.subresource
}}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeapierrorshigh
expr: |
sum(rate(apiserver_request_total{job="apiserver",code=~"5.."}[5m])) by (resource,subresource,verb)
/
sum(rate(apiserver_request_total{job="apiserver"}[5m])) by (resource,subresource,verb) > 0.05
for: 10m
labels:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
message: A client certificate used to authenticate to the apiserver is expiring