kube-prometheus: Re-generate

This commit is contained in:
Nick
2019-04-02 08:11:36 +01:00
parent 7d1c0bdd73
commit 1472a90279
3 changed files with 31 additions and 17 deletions

View File

@@ -28,7 +28,7 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "b8b1a40066bd40bf7612bbb1cc9208f76530f44a" "version": "0669b548b8bc981f2676e7ec70c8f4a05fa39aa7"
}, },
{ {
"name": "grafonnet", "name": "grafonnet",

View File

@@ -6855,7 +6855,7 @@ items:
"options": [ "options": [
], ],
"query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, exported_namespace)", "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 0, "sort": 0,
@@ -6881,7 +6881,7 @@ items:
"options": [ "options": [
], ],
"query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", exported_namespace=\"$namespace\"}, persistentvolumeclaim)", "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 0, "sort": 0,
@@ -7013,14 +7013,14 @@ items:
"refId": "A" "refId": "A"
}, },
{ {
"expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Requested: {{ container }}", "legendFormat": "Requested: {{ container }}",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container=~\"$container\"})", "expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "Limit: {{ container }}", "legendFormat": "Limit: {{ container }}",
@@ -7124,11 +7124,25 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", cluster=\"$cluster\", namespace=\"$namespace\", image!=\"\", pod_name=\"$pod\", container_name=~\"$container\", container_name!=\"POD\"}[1m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{ container_name }}", "legendFormat": "Current: {{ container_name }}",
"refId": "A" "refId": "A"
},
{
"expr": "sum by(container) (kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Requested: {{ container }}",
"refId": "B"
},
{
"expr": "sum by(container) (kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\", pod=\"$pod\", container=~\"$container\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Limit: {{ container }}",
"refId": "C"
} }
], ],
"thresholds": [ "thresholds": [

View File

@@ -37,14 +37,14 @@ spec:
record: namespace_name:container_memory_usage_bytes:sum record: namespace_name:container_memory_usage_bytes:sum
- expr: | - expr: |
sum by (namespace, label_name) ( sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"}) by (namespace, pod) sum(kube_pod_container_resource_requests_memory_bytes{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod) group_left(label_name) * on (namespace, pod) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
) )
record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum record: namespace_name:kube_pod_container_resource_requests_memory_bytes:sum
- expr: | - expr: |
sum by (namespace, label_name) ( sum by (namespace, label_name) (
sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} and on(pod) kube_pod_status_scheduled{condition="true"}) by (namespace, pod) sum(kube_pod_container_resource_requests_cpu_cores{job="kube-state-metrics"} * on (endpoint, instance, job, namespace, pod, service) group_left(phase) (kube_pod_status_phase{phase=~"^(Pending|Running)$"} == 1)) by (namespace, pod)
* on (namespace, pod) group_left(label_name) * on (namespace, pod) group_left(label_name)
label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)") label_replace(kube_pod_labels{job="kube-state-metrics"}, "pod_name", "$1", "pod", "(.*)")
) )
@@ -235,11 +235,11 @@ spec:
) )
record: node:node_disk_utilisation:avg_irate record: node:node_disk_utilisation:avg_irate
- expr: | - expr: |
avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3) avg(irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]))
record: :node_disk_saturation:avg_irate record: :node_disk_saturation:avg_irate
- expr: | - expr: |
avg by (node) ( avg by (node) (
irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m]) / 1e3 irate(node_disk_io_time_weighted_seconds_total{job="node-exporter",device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -603,7 +603,7 @@ spec:
message: Cluster has overcommitted CPU resource requests for Namespaces. message: Cluster has overcommitted CPU resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
expr: | expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.cpu"}) sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="cpu"})
/ /
sum(node:node_num_cpu:sum) sum(node:node_num_cpu:sum)
> 1.5 > 1.5
@@ -615,7 +615,7 @@ spec:
message: Cluster has overcommitted memory resource requests for Namespaces. message: Cluster has overcommitted memory resource requests for Namespaces.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememovercommit
expr: | expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"}) sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="memory"})
/ /
sum(node_memory_MemTotal_bytes{job="node-exporter"}) sum(node_memory_MemTotal_bytes{job="node-exporter"})
> 1.5 > 1.5
@@ -813,19 +813,19 @@ spec:
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
message: A client certificate used to authenticate to the apiserver is expiring message: A client certificate used to authenticate to the apiserver is expiring
in less than 7 days. in less than 7.0 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: | expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800 apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
labels: labels:
severity: warning severity: warning
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
message: A client certificate used to authenticate to the apiserver is expiring message: A client certificate used to authenticate to the apiserver is expiring
in less than 24 hours. in less than 24.0 hours.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
expr: | expr: |
histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400 apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0 and histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
labels: labels:
severity: critical severity: critical
- name: alertmanager.rules - name: alertmanager.rules