bump rules

This commit is contained in:
Dmitry Verkhoturov
2018-11-07 12:59:40 +03:00
parent 0372a60d0c
commit 5a0835fa26
2 changed files with 58 additions and 58 deletions

View File

@@ -796,7 +796,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(max(node_filesystem_size{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"} - node_filesystem_avail{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:\n", "expr": "sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"} - node_filesystem_avail_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)) by (pod,namespace)\n/ scalar(sum(max(node_filesystem_size_bytes{fstype=\u007e\"ext[234]|btrfs|xfs|zfs\"}) by (device,pod,namespace)))\n* on (namespace, pod) group_left (node) node_namespace_pod:kube_pod_info:\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{node}}", "legendFormat": "{{node}}",
@@ -1920,7 +1920,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "1 - avg(rate(node_cpu{mode=\"idle\"}[1m]))", "expr": "1 - avg(rate(node_cpu_seconds_total{mode=\"idle\"}[1m]))",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@@ -2172,7 +2172,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "1 - sum(:node_memory_MemFreeCachedBuffers:sum) / sum(:node_memory_MemTotal:sum)", "expr": "1 - sum(:node_memory_MemFreeCachedBuffers_bytes:sum) / sum(:node_memory_MemTotal_bytes:sum)",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@@ -2256,7 +2256,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(:node_memory_MemTotal:sum)", "expr": "sum(kube_pod_container_resource_requests_memory_bytes) / sum(:node_memory_MemTotal_bytes:sum)",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@@ -2340,7 +2340,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(:node_memory_MemTotal:sum)", "expr": "sum(kube_pod_container_resource_limits_memory_bytes) / sum(:node_memory_MemTotal_bytes:sum)",
"format": "time_series", "format": "time_series",
"instant": true, "instant": true,
"intervalFactor": 2, "intervalFactor": 2,
@@ -5003,7 +5003,7 @@ items:
}, },
"yaxes": [ "yaxes": [
{ {
"format": "percentunit", "format": "short",
"label": null, "label": null,
"logBase": 1, "logBase": 1,
"max": null, "max": null,
@@ -5011,7 +5011,7 @@ items:
"show": true "show": true
}, },
{ {
"format": "percentunit", "format": "short",
"label": null, "label": null,
"logBase": 1, "logBase": 1,
"max": null, "max": null,
@@ -5064,7 +5064,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m])) * 100", "expr": "avg by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[5m])) * 100",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{cpu}}", "legendFormat": "{{cpu}}",
@@ -5076,7 +5076,7 @@ items:
], ],
"timeFrom": null, "timeFrom": null,
"timeShift": null, "timeShift": null,
"title": "System load", "title": "Usage Per Core",
"tooltip": { "tooltip": {
"shared": true, "shared": true,
"sort": 0, "sort": 0,
@@ -5168,7 +5168,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "avg (sum by (cpu) (irate(node_cpu{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n", "expr": "max (sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m])) ) * 100\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 10, "intervalFactor": 10,
"legendFormat": "{{ cpu }}", "legendFormat": "{{ cpu }}",
@@ -5276,7 +5276,7 @@ items:
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "avg(sum by (cpu) (irate(node_cpu{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n", "expr": "avg(sum by (cpu) (irate(node_cpu_seconds_total{job=\"node-exporter\", mode!=\"idle\", instance=\"$instance\"}[2m]))) * 100\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "" "legendFormat": ""
@@ -5352,28 +5352,28 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "max(\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n)\n", "expr": "max(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "memory used", "legendFormat": "memory used",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "max(node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"})", "expr": "max(node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "memory buffers", "legendFormat": "memory buffers",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "max(node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"})", "expr": "max(node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "memory cached", "legendFormat": "memory cached",
"refId": "C" "refId": "C"
}, },
{ {
"expr": "max(node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"})", "expr": "max(node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"})",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "memory free", "legendFormat": "memory free",
@@ -5481,7 +5481,7 @@ items:
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "max(\n (\n (\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n", "expr": "max(\n (\n (\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\"}\n - node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\"}\n )\n / node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n ) * 100)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "" "legendFormat": ""
@@ -5564,21 +5564,21 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "max(rate(node_disk_bytes_read{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "expr": "max(rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "read", "legendFormat": "read",
"refId": "A" "refId": "A"
}, },
{ {
"expr": "max(rate(node_disk_bytes_written{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "expr": "max(rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "written", "legendFormat": "written",
"refId": "B" "refId": "B"
}, },
{ {
"expr": "max(rate(node_disk_io_time_ms{job=\"node-exporter\", instance=\"$instance\"}[2m]))", "expr": "max(rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "io time", "legendFormat": "io time",
@@ -5773,7 +5773,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "max(rate(node_network_receive_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", "expr": "max(rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{device}}", "legendFormat": "{{device}}",
@@ -5864,7 +5864,7 @@ items:
"steppedLine": false, "steppedLine": false,
"targets": [ "targets": [
{ {
"expr": "max(rate(node_network_transmit_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))", "expr": "max(rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m]))",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "{{device}}", "legendFormat": "{{device}}",
@@ -5958,7 +5958,7 @@ items:
"options": [ "options": [
], ],
"query": "label_values(node_boot_time{job=\"node-exporter\"}, instance)", "query": "label_values(node_boot_time_seconds{job=\"node-exporter\"}, instance)",
"refresh": 2, "refresh": 2,
"regex": "", "regex": "",
"sort": 0, "sort": 0,

View File

@@ -122,17 +122,17 @@ spec:
record: 'node_namespace_pod:kube_pod_info:' record: 'node_namespace_pod:kube_pod_info:'
- expr: | - expr: |
count by (node) (sum by (node, cpu) ( count by (node) (sum by (node, cpu) (
node_cpu{job="node-exporter"} node_cpu_seconds_total{job="node-exporter"}
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
)) ))
record: node:node_num_cpu:sum record: node:node_num_cpu:sum
- expr: | - expr: |
1 - avg(rate(node_cpu{job="node-exporter",mode="idle"}[1m])) 1 - avg(rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m]))
record: :node_cpu_utilisation:avg1m record: :node_cpu_utilisation:avg1m
- expr: | - expr: |
1 - avg by (node) ( 1 - avg by (node) (
rate(node_cpu{job="node-exporter",mode="idle"}[1m]) rate(node_cpu_seconds_total{job="node-exporter",mode="idle"}[1m])
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info:) node_namespace_pod:kube_pod_info:)
record: node:node_cpu_utilisation:avg1m record: node:node_cpu_utilisation:avg1m
@@ -152,26 +152,26 @@ spec:
record: 'node:node_cpu_saturation_load1:' record: 'node:node_cpu_saturation_load1:'
- expr: | - expr: |
1 - 1 -
sum(node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"}) sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
/ /
sum(node_memory_MemTotal{job="node-exporter"}) sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: ':node_memory_utilisation:' record: ':node_memory_utilisation:'
- expr: | - expr: |
sum(node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"}) sum(node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
record: :node_memory_MemFreeCachedBuffers:sum record: :node_memory_MemFreeCachedBuffers_bytes:sum
- expr: | - expr: |
sum(node_memory_MemTotal{job="node-exporter"}) sum(node_memory_MemTotal_bytes{job="node-exporter"})
record: :node_memory_MemTotal:sum record: :node_memory_MemTotal_bytes:sum
- expr: | - expr: |
sum by (node) ( sum by (node) (
(node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"}) (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
record: node:node_memory_bytes_available:sum record: node:node_memory_bytes_available:sum
- expr: | - expr: |
sum by (node) ( sum by (node) (
node_memory_MemTotal{job="node-exporter"} node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -190,13 +190,13 @@ spec:
- expr: | - expr: |
1 - 1 -
sum by (node) ( sum by (node) (
(node_memory_MemFree{job="node-exporter"} + node_memory_Cached{job="node-exporter"} + node_memory_Buffers{job="node-exporter"}) (node_memory_MemFree_bytes{job="node-exporter"} + node_memory_Cached_bytes{job="node-exporter"} + node_memory_Buffers_bytes{job="node-exporter"})
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
/ /
sum by (node) ( sum by (node) (
node_memory_MemTotal{job="node-exporter"} node_memory_MemTotal_bytes{job="node-exporter"}
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -213,21 +213,21 @@ spec:
) )
record: node:node_memory_swap_io_bytes:sum_rate record: node:node_memory_swap_io_bytes:sum_rate
- expr: | - expr: |
avg(irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3) avg(irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]))
record: :node_disk_utilisation:avg_irate record: :node_disk_utilisation:avg_irate
- expr: | - expr: |
avg by (node) ( avg by (node) (
irate(node_disk_io_time_ms{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3 irate(node_disk_io_time_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m])
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
record: node:node_disk_utilisation:avg_irate record: node:node_disk_utilisation:avg_irate
- expr: | - expr: |
avg(irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3) avg(irate(node_disk_io_time_weighted_seconds_total_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3)
record: :node_disk_saturation:avg_irate record: :node_disk_saturation:avg_irate
- expr: | - expr: |
avg by (node) ( avg by (node) (
irate(node_disk_io_time_weighted{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3 irate(node_disk_io_time_weighted_seconds_total_seconds_total{job="node-exporter",device=~"(sd|xvd|nvme).+"}[1m]) / 1e3
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -241,25 +241,25 @@ spec:
max by (namespace, pod, device) (node_filesystem_avail{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size{fstype=~"ext[234]|btrfs|xfs|zfs"}) max by (namespace, pod, device) (node_filesystem_avail{fstype=~"ext[234]|btrfs|xfs|zfs"} / node_filesystem_size{fstype=~"ext[234]|btrfs|xfs|zfs"})
record: 'node:node_filesystem_avail:' record: 'node:node_filesystem_avail:'
- expr: | - expr: |
sum(irate(node_network_receive_bytes{job="node-exporter",device="eth0"}[1m])) + sum(irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m])) +
sum(irate(node_network_transmit_bytes{job="node-exporter",device="eth0"}[1m])) sum(irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m]))
record: :node_net_utilisation:sum_irate record: :node_net_utilisation:sum_irate
- expr: | - expr: |
sum by (node) ( sum by (node) (
(irate(node_network_receive_bytes{job="node-exporter",device="eth0"}[1m]) + (irate(node_network_receive_bytes_total{job="node-exporter",device="eth0"}[1m]) +
irate(node_network_transmit_bytes{job="node-exporter",device="eth0"}[1m])) irate(node_network_transmit_bytes_total{job="node-exporter",device="eth0"}[1m]))
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
record: node:node_net_utilisation:sum_irate record: node:node_net_utilisation:sum_irate
- expr: | - expr: |
sum(irate(node_network_receive_drop{job="node-exporter",device="eth0"}[1m])) + sum(irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m])) +
sum(irate(node_network_transmit_drop{job="node-exporter",device="eth0"}[1m])) sum(irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m]))
record: :node_net_saturation:sum_irate record: :node_net_saturation:sum_irate
- expr: | - expr: |
sum by (node) ( sum by (node) (
(irate(node_network_receive_drop{job="node-exporter",device="eth0"}[1m]) + (irate(node_network_receive_drop_total{job="node-exporter",device="eth0"}[1m]) +
irate(node_network_transmit_drop{job="node-exporter",device="eth0"}[1m])) irate(node_network_transmit_drop_total{job="node-exporter",device="eth0"}[1m]))
* on (namespace, pod) group_left(node) * on (namespace, pod) group_left(node)
node_namespace_pod:kube_pod_info: node_namespace_pod:kube_pod_info:
) )
@@ -688,8 +688,8 @@ spec:
severity: warning severity: warning
- alert: KubeCronJobRunning - alert: KubeCronJobRunning
annotations: annotations:
message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking message: CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} is taking more
more than 1h to complete. than 1h to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecronjobrunning
expr: | expr: |
time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600 time() - kube_cronjob_next_schedule_time{job="kube-state-metrics"} > 3600
@@ -698,8 +698,8 @@ spec:
severity: warning severity: warning
- alert: KubeJobCompletion - alert: KubeJobCompletion
annotations: annotations:
message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than message: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
one hour to complete. than one hour to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
expr: | expr: |
kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0 kube_job_spec_completions{job="kube-state-metrics"} - kube_job_status_succeeded{job="kube-state-metrics"} > 0
@@ -739,7 +739,7 @@ spec:
expr: | expr: |
sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum) sum(namespace_name:kube_pod_container_resource_requests_memory_bytes:sum)
/ /
sum(node_memory_MemTotal) sum(node_memory_MemTotal_bytes)
> >
(count(node:node_num_cpu:sum)-1) (count(node:node_num_cpu:sum)-1)
/ /
@@ -766,7 +766,7 @@ spec:
expr: | expr: |
sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"}) sum(kube_resourcequota{job="kube-state-metrics", type="hard", resource="requests.memory"})
/ /
sum(node_memory_MemTotal{job="node-exporter"}) sum(node_memory_MemTotal_bytes{job="node-exporter"})
> 1.5 > 1.5
for: 5m for: 5m
labels: labels:
@@ -801,7 +801,7 @@ spec:
- alert: KubePersistentVolumeUsageCritical - alert: KubePersistentVolumeUsageCritical
annotations: annotations:
message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim message: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is only {{ printf "%0.0f" $value }} in Namespace {{ $labels.namespace }} is only {{ printf "%0.2f" $value
}}% free. }}% free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeusagecritical
expr: | expr: |
@@ -816,14 +816,14 @@ spec:
annotations: annotations:
message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim message: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} is expected to fill up within four }} in Namespace {{ $labels.namespace }} is expected to fill up within four
days. Currently {{ $value }} bytes are available. days. Currently {{ printf "%0.2f" $value }}% is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefullinfourdays
expr: | expr: |
( 100 * (
kubelet_volume_stats_used_bytes{job="kubelet"} kubelet_volume_stats_available_bytes{job="kubelet"}
/ /
kubelet_volume_stats_capacity_bytes{job="kubelet"} kubelet_volume_stats_capacity_bytes{job="kubelet"}
) > 0.85 ) < 15
and and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0 predict_linear(kubelet_volume_stats_available_bytes{job="kubelet"}[6h], 4 * 24 * 3600) < 0
for: 5m for: 5m