[bot] [main] Automated version update

This commit is contained in:
paulfantom
2021-08-16 08:04:51 +00:00
committed by GitHub
parent 8d36d0d707
commit ad3fc8920e
32 changed files with 311 additions and 98 deletions

View File

@@ -248,32 +248,26 @@ spec:
rules:
- alert: KubeCPUOvercommit
annotations:
description: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
description: Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests.
expr: |
sum(namespace_cpu:kube_pod_container_resource_requests:sum{})
/
sum(kube_node_status_allocatable{resource="cpu"})
>
((count(kube_node_status_allocatable{resource="cpu"}) > 1) - 1) / count(kube_node_status_allocatable{resource="cpu"})
for: 5m
sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
and
(sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0
for: 10m
labels:
severity: warning
- alert: KubeMemoryOvercommit
annotations:
description: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
description: Cluster has overcommitted memory resource requests for Pods by {{ $value }} bytes and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
summary: Cluster has overcommitted memory resource requests.
expr: |
sum(namespace_memory:kube_pod_container_resource_requests:sum{})
/
sum(kube_node_status_allocatable{resource="memory"})
>
((count(kube_node_status_allocatable{resource="memory"}) > 1) - 1)
/
count(kube_node_status_allocatable{resource="memory"})
for: 5m
sum(namespace_memory:kube_pod_container_resource_requests:sum{}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
and
(sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0
for: 10m
labels:
severity: warning
- alert: KubeCPUQuotaOvercommit
@@ -418,9 +412,9 @@ spec:
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
summary: Kubernetes API server client is experiencing errors.
expr: |
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job)
(sum(rate(rest_client_requests_total{code=~"5.."}[5m])) by (instance, job, namespace)
/
sum(rate(rest_client_requests_total[5m])) by (instance, job))
sum(rate(rest_client_requests_total[5m])) by (instance, job, namespace))
> 0.01
for: 15m
labels: