jb update

This commit is contained in:
Jesse Bye
2020-09-30 14:04:09 -07:00
parent ad4462e506
commit f0755dcaa8
12 changed files with 2512 additions and 8125 deletions

2
go.mod
View File

@@ -5,7 +5,7 @@ go 1.13
require ( require (
github.com/Jeffail/gabs v1.2.0 github.com/Jeffail/gabs v1.2.0
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d // indirect
github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742 github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c
github.com/campoy/embedmd v1.0.0 github.com/campoy/embedmd v1.0.0
github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f github.com/google/go-jsonnet v0.16.1-0.20200703153429-aaf50f5b655f
github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect

4
go.sum
View File

@@ -13,6 +13,8 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742 h1:PdvQdwUXiFnSmWsOJcBXLpyH3mJfP2FMPTT3J0i7+8o= github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742 h1:PdvQdwUXiFnSmWsOJcBXLpyH3mJfP2FMPTT3J0i7+8o=
github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742/go.mod h1:IyUJYN1gvWjtLF5ZuygmxbnsAyP3aJS6cHzIuZY50B0= github.com/brancz/gojsontoyaml v0.0.0-20191212081931-bf2969bbd742/go.mod h1:IyUJYN1gvWjtLF5ZuygmxbnsAyP3aJS6cHzIuZY50B0=
github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c h1:hb6WqfcKQZlNx/vahy51SaIvKnoXD5609Nm0PC4msEM=
github.com/brancz/gojsontoyaml v0.0.0-20200602132005-3697ded27e8c/go.mod h1:+00lOjYXPgMfxHVPvg9GDtc3BX5Xh5aFpB4gMB8gfMo=
github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY= github.com/campoy/embedmd v1.0.0 h1:V4kI2qTJJLf4J29RzI/MAt2c3Bl4dQSYPuflzwFH2hY=
github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8= github.com/campoy/embedmd v1.0.0/go.mod h1:oxyr9RCiSXg0M3VJ3ks0UGfp98BpSSGr0kpiX3MzVl8=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@@ -168,6 +170,8 @@ gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c= gopkg.in/yaml.v2 v2.2.5 h1:ymVxjfMaHvXD8RqPRmzHHsB3VvucivSkIAvJFDI5O3c=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc= k8s.io/api v0.0.0-20190313235455-40a48860b5ab h1:DG9A67baNpoeweOy2spF1OWHhnVY5KR7/Ek/+U1lVZc=
k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA= k8s.io/api v0.0.0-20190313235455-40a48860b5ab/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA=
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg= k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 h1:IS7K02iBkQXpCeieSiyJjGoLSdVOv2DbPaWHJ+ZtgKg=

View File

@@ -18,8 +18,8 @@
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
"version": "205a656cc58b32ab701c74297ce440bd517a680f", "version": "ab4cc3caef3d6a1bb7c8c9e79749357eafef42df",
"sum": "NhOkJWkO7ZO2DSE8Fvipcs7Hh2/GOCS0WjPPZU8OiaQ=" "sum": "5awm+ZMs5J/nOFB+hLAb7hdeQxz/iIrls5hEZoEkXjM="
}, },
{ {
"source": { "source": {
@@ -28,8 +28,8 @@
"subdir": "grafonnet" "subdir": "grafonnet"
} }
}, },
"version": "cc1626a1b4dee45c99b78ddd9714dfd5f5d7816e", "version": "4ae0ba995612d3fe71bb74ec23a49815b6896817",
"sum": "nkgrtMYPCq/YB4r3mKyToepaLhicwWnxDdGIodPpzz0=" "sum": "J5CaYYEP02FleLx34/qgS+ckWUaxUaADlfnV7CriSo8="
}, },
{ {
"source": { "source": {
@@ -38,7 +38,7 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "ecc83f569bbf56f2863e9c3cefa4487e3253aaa6", "version": "b8cb0881befce313a6741489c6018662be663d5e",
"sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc=" "sum": "mD0zEP9FVFXeag7EaeS5OvUr2A9D6DQhGemoNn6+PLc="
}, },
{ {
@@ -79,7 +79,7 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "2ebe4dffc52ee2d1ca5b6444be54bcf0dba5372a", "version": "392f9249f808041612a8d3c5348fc48b64cece74",
"sum": "WJGwddC7KJnEN7CWGELiOHKam+vmv9XYkwrMCwmXi2M=" "sum": "WJGwddC7KJnEN7CWGELiOHKam+vmv9XYkwrMCwmXi2M="
}, },
{ {
@@ -89,7 +89,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "2ebe4dffc52ee2d1ca5b6444be54bcf0dba5372a", "version": "392f9249f808041612a8d3c5348fc48b64cece74",
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo=" "sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
}, },
{ {
@@ -99,7 +99,7 @@
"subdir": "jsonnet/mixin" "subdir": "jsonnet/mixin"
} }
}, },
"version": "d7befd55020b3c040bea42fdb5b13d9f9d96474c", "version": "fbd01683839aa408b31fa15fa1aa91c68f13d7ef",
"sum": "vqz67twCROf5kVgo/61luBOx25Mk7Okbt8YP+/7xjT0=" "sum": "vqz67twCROf5kVgo/61luBOx25Mk7Okbt8YP+/7xjT0="
}, },
{ {
@@ -109,7 +109,7 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "a1eb8fd04f3cd2bfb11aee8943e0b3469ff2f199", "version": "cd331ce9bb58bb926e391c6ae807621cb12cc29e",
"sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk=" "sum": "nM1eDP5vftqAeQSmVYzSBAh+lG0SN6zu46QiocQiVhk="
}, },
{ {
@@ -119,8 +119,8 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "ff2ff3410f4ea8195e51f5fb8d84151684f91b3f", "version": "a3aaf63bb1262aada3cd9ca1fe59ffc3ea32e9e2",
"sum": "znDrZiHvvascm7Xuj3lTASIOfwX4Vmx7PELmKKw4YiI=" "sum": "zpo4/qvCbAmfJXGjwrdzXcfsvg4fxZD6wi5af/kt+8g="
}, },
{ {
"source": { "source": {

View File

@@ -24367,7 +24367,7 @@ items:
"tableColumn": "", "tableColumn": "",
"targets": [ "targets": [
{ {
"expr": "100 -\n(\n node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"}\n/\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"}\n* 100\n)\n", "expr": "100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\"})\n/\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\"})\n* 100\n)\n",
"format": "time_series", "format": "time_series",
"intervalFactor": 2, "intervalFactor": 2,
"legendFormat": "", "legendFormat": "",

View File

@@ -762,21 +762,17 @@ spec:
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- name: kube-prometheus-node-recording.rules - name: kube-prometheus-node-recording.rules
rules: rules:
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[3m])) BY (instance)
(instance)
record: instance:node_cpu:rate:sum record: instance:node_cpu:rate:sum
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance)
record: instance:node_network_receive_bytes:rate:sum record: instance:node_network_receive_bytes:rate:sum
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance)
record: instance:node_network_transmit_bytes:rate:sum record: instance:node_network_transmit_bytes:rate:sum
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY (instance, cpu)) BY (instance)
(cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total)
BY (instance, cpu)) BY (instance)
record: instance:node_cpu:ratio record: instance:node_cpu:ratio
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m])) - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait"}[5m]))
record: cluster:node_cpu:sum_rate5m record: cluster:node_cpu:sum_rate5m
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) - expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) BY (instance, cpu))
BY (instance, cpu))
record: cluster:node_cpu:ratio record: cluster:node_cpu:ratio
- name: kube-prometheus-general.rules - name: kube-prometheus-general.rules
rules: rules:
@@ -788,9 +784,7 @@ spec:
rules: rules:
- alert: KubeStateMetricsListErrors - alert: KubeStateMetricsListErrors
annotations: annotations:
description: kube-state-metrics is experiencing errors at an elevated rate description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
in list operations. This is likely causing it to not be able to expose metrics
about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
summary: kube-state-metrics is experiencing errors in list operations. summary: kube-state-metrics is experiencing errors in list operations.
expr: | expr: |
@@ -803,9 +797,7 @@ spec:
severity: critical severity: critical
- alert: KubeStateMetricsWatchErrors - alert: KubeStateMetricsWatchErrors
annotations: annotations:
description: kube-state-metrics is experiencing errors at an elevated rate description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all.
in watch operations. This is likely causing it to not be able to expose
metrics about Kubernetes objects correctly or at all.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
summary: kube-state-metrics is experiencing errors in watch operations. summary: kube-state-metrics is experiencing errors in watch operations.
expr: | expr: |
@@ -820,9 +812,7 @@ spec:
rules: rules:
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
has only {{ printf "%.2f" $value }}% available space left and is filling
up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 24 hours. summary: Filesystem is predicted to run out of space within the next 24 hours.
expr: | expr: |
@@ -838,9 +828,7 @@ spec:
severity: warning severity: warning
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
has only {{ printf "%.2f" $value }}% available space left and is filling
up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
summary: Filesystem is predicted to run out of space within the next 4 hours. summary: Filesystem is predicted to run out of space within the next 4 hours.
expr: | expr: |
@@ -856,8 +844,7 @@ spec:
severity: critical severity: critical
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 5% space left. summary: Filesystem has less than 5% space left.
expr: | expr: |
@@ -871,8 +858,7 @@ spec:
severity: warning severity: warning
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
has only {{ printf "%.2f" $value }}% available space left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
summary: Filesystem has less than 3% space left. summary: Filesystem has less than 3% space left.
expr: | expr: |
@@ -886,9 +872,7 @@ spec:
severity: critical severity: critical
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 24 hours. summary: Filesystem is predicted to run out of inodes within the next 24 hours.
expr: | expr: |
@@ -904,9 +888,7 @@ spec:
severity: warning severity: warning
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
has only {{ printf "%.2f" $value }}% available inodes left and is filling
up fast.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
summary: Filesystem is predicted to run out of inodes within the next 4 hours. summary: Filesystem is predicted to run out of inodes within the next 4 hours.
expr: | expr: |
@@ -922,8 +904,7 @@ spec:
severity: critical severity: critical
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 5% inodes left. summary: Filesystem has less than 5% inodes left.
expr: | expr: |
@@ -937,8 +918,7 @@ spec:
severity: warning severity: warning
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
has only {{ printf "%.2f" $value }}% available inodes left.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
summary: Filesystem has less than 3% inodes left. summary: Filesystem has less than 3% inodes left.
expr: | expr: |
@@ -952,8 +932,7 @@ spec:
severity: critical severity: critical
- alert: NodeNetworkReceiveErrs - alert: NodeNetworkReceiveErrs
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors. summary: Network interface is reporting many receive errors.
expr: | expr: |
@@ -963,8 +942,7 @@ spec:
severity: warning severity: warning
- alert: NodeNetworkTransmitErrs - alert: NodeNetworkTransmitErrs
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors. summary: Network interface is reporting many transmit errors.
expr: | expr: |
@@ -992,8 +970,7 @@ spec:
severity: warning severity: warning
- alert: NodeClockSkewDetected - alert: NodeClockSkewDetected
annotations: annotations:
message: Clock on {{ $labels.instance }} is out of sync by more than 300s. message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
Ensure NTP is configured correctly on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
summary: Clock skew detected. summary: Clock skew detected.
expr: | expr: |
@@ -1013,8 +990,7 @@ spec:
severity: warning severity: warning
- alert: NodeClockNotSynchronising - alert: NodeClockNotSynchronising
annotations: annotations:
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
is configured on this host.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
summary: Clock not synchronising. summary: Clock not synchronising.
expr: | expr: |
@@ -1024,9 +1000,7 @@ spec:
severity: warning severity: warning
- alert: NodeRAIDDegraded - alert: NodeRAIDDegraded
annotations: annotations:
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded
summary: RAID Array is degraded summary: RAID Array is degraded
expr: | expr: |
@@ -1036,8 +1010,7 @@ spec:
severity: critical severity: critical
- alert: NodeRAIDDiskFailure - alert: NodeRAIDDiskFailure
annotations: annotations:
description: At least one device in RAID array on {{ $labels.instance }} failed. description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure
summary: Failed device in RAID array summary: Failed device in RAID array
expr: | expr: |
@@ -1048,8 +1021,7 @@ spec:
rules: rules:
- alert: PrometheusOperatorListErrors - alert: PrometheusOperatorListErrors
annotations: annotations:
description: Errors while performing List operations in controller {{$labels.controller}} description: Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors
summary: Errors while performing list operations in controller. summary: Errors while performing list operations in controller.
expr: | expr: |
@@ -1059,8 +1031,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusOperatorWatchErrors - alert: PrometheusOperatorWatchErrors
annotations: annotations:
description: Errors while performing watch operations in controller {{$labels.controller}} description: Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller. summary: Errors while performing watch operations in controller.
expr: | expr: |
@@ -1070,9 +1041,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusOperatorReconcileErrors - alert: PrometheusOperatorReconcileErrors
annotations: annotations:
description: '{{ $value | humanizePercentage }} of reconciling operations description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.'
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
namespace.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors
summary: Errors while reconciling controller. summary: Errors while reconciling controller.
expr: | expr: |
@@ -1082,8 +1051,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusOperatorNodeLookupErrors - alert: PrometheusOperatorNodeLookupErrors
annotations: annotations:
description: Errors while reconciling Prometheus in {{ $labels.namespace }} description: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
Namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus. summary: Errors while reconciling Prometheus.
expr: | expr: |
@@ -1095,8 +1063,7 @@ spec:
rules: rules:
- alert: KubePodCrashLooping - alert: KubePodCrashLooping
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
}}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodcrashlooping
summary: Pod is crash looping. summary: Pod is crash looping.
expr: | expr: |
@@ -1106,8 +1073,7 @@ spec:
severity: warning severity: warning
- alert: KubePodNotReady - alert: KubePodNotReady
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready state for longer than 15 minutes.
state for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes. summary: Pod has been in a non-ready state for more than 15 minutes.
expr: | expr: |
@@ -1123,9 +1089,7 @@ spec:
severity: warning severity: warning
- alert: KubeDeploymentGenerationMismatch - alert: KubeDeploymentGenerationMismatch
annotations: annotations:
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment }} does not match, this indicates that the Deployment has failed but has not been rolled back.
}} does not match, this indicates that the Deployment has failed but has
not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentgenerationmismatch
summary: Deployment generation mismatch due to possible roll-back summary: Deployment generation mismatch due to possible roll-back
expr: | expr: |
@@ -1137,8 +1101,7 @@ spec:
severity: warning severity: warning
- alert: KubeDeploymentReplicasMismatch - alert: KubeDeploymentReplicasMismatch
annotations: annotations:
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has not matched the expected number of replicas for longer than 15 minutes.
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas. summary: Deployment has not matched the expected number of replicas.
expr: | expr: |
@@ -1156,8 +1119,7 @@ spec:
severity: warning severity: warning
- alert: KubeStatefulSetReplicasMismatch - alert: KubeStatefulSetReplicasMismatch
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has not matched the expected number of replicas for longer than 15 minutes.
has not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch
summary: Deployment has not matched the expected number of replicas. summary: Deployment has not matched the expected number of replicas.
expr: | expr: |
@@ -1175,9 +1137,7 @@ spec:
severity: warning severity: warning
- alert: KubeStatefulSetGenerationMismatch - alert: KubeStatefulSetGenerationMismatch
annotations: annotations:
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset }} does not match, this indicates that the StatefulSet has failed but has not been rolled back.
}} does not match, this indicates that the StatefulSet has failed but has
not been rolled back.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetgenerationmismatch
summary: StatefulSet generation mismatch due to possible roll-back summary: StatefulSet generation mismatch due to possible roll-back
expr: | expr: |
@@ -1189,8 +1149,7 @@ spec:
severity: warning severity: warning
- alert: KubeStatefulSetUpdateNotRolledOut - alert: KubeStatefulSetUpdateNotRolledOut
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update has not been rolled out.
update has not been rolled out.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out. summary: StatefulSet update has not been rolled out.
expr: | expr: |
@@ -1216,8 +1175,7 @@ spec:
severity: warning severity: warning
- alert: KubeDaemonSetRolloutStuck - alert: KubeDaemonSetRolloutStuck
annotations: annotations:
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not finished or progressed for at least 15 minutes.
not finished or progressed for at least 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck. summary: DaemonSet rollout is stuck.
expr: | expr: |
@@ -1249,8 +1207,7 @@ spec:
severity: warning severity: warning
- alert: KubeContainerWaiting - alert: KubeContainerWaiting
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} description: Pod {{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container}} has been in waiting state for longer than 1 hour.
has been in waiting state for longer than 1 hour.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: | expr: |
@@ -1260,8 +1217,7 @@ spec:
severity: warning severity: warning
- alert: KubeDaemonSetNotScheduled - alert: KubeDaemonSetNotScheduled
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are not scheduled.'
}} are not scheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled. summary: DaemonSet pods are not scheduled.
expr: | expr: |
@@ -1273,8 +1229,7 @@ spec:
severity: warning severity: warning
- alert: KubeDaemonSetMisScheduled - alert: KubeDaemonSetMisScheduled
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} are running where they are not supposed to run.'
}} are running where they are not supposed to run.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled. summary: DaemonSet pods are misscheduled.
expr: | expr: |
@@ -1284,8 +1239,7 @@ spec:
severity: warning severity: warning
- alert: KubeJobCompletion - alert: KubeJobCompletion
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more than 12 hours to complete.
more than 12 hours to complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobcompletion
summary: Job did not complete in time summary: Job did not complete in time
expr: | expr: |
@@ -1295,8 +1249,7 @@ spec:
severity: warning severity: warning
- alert: KubeJobFailed - alert: KubeJobFailed
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
complete.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubejobfailed
summary: Job failed to complete. summary: Job failed to complete.
expr: | expr: |
@@ -1306,8 +1259,7 @@ spec:
severity: warning severity: warning
- alert: KubeHpaReplicasMismatch - alert: KubeHpaReplicasMismatch
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has not matched the desired number of replicas for longer than 15 minutes.
the desired number of replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpareplicasmismatch
summary: HPA has not matched descired number of replicas. summary: HPA has not matched descired number of replicas.
expr: | expr: |
@@ -1321,8 +1273,7 @@ spec:
severity: warning severity: warning
- alert: KubeHpaMaxedOut - alert: KubeHpaMaxedOut
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running description: HPA {{ $labels.namespace }}/{{ $labels.hpa }} has been running at max replicas for longer than 15 minutes.
at max replicas for longer than 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubehpamaxedout
summary: HPA is running at max replicas summary: HPA is running at max replicas
expr: | expr: |
@@ -1336,8 +1287,7 @@ spec:
rules: rules:
- alert: KubeCPUOvercommit - alert: KubeCPUOvercommit
annotations: annotations:
description: Cluster has overcommitted CPU resource requests for Pods and description: Cluster has overcommitted CPU resource requests for Pods and cannot tolerate node failure.
cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
expr: | expr: |
@@ -1351,8 +1301,7 @@ spec:
severity: warning severity: warning
- alert: KubeMemoryOvercommit - alert: KubeMemoryOvercommit
annotations: annotations:
description: Cluster has overcommitted memory resource requests for Pods and description: Cluster has overcommitted memory resource requests for Pods and cannot tolerate node failure.
cannot tolerate node failure.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
expr: | expr: |
@@ -1394,8 +1343,7 @@ spec:
severity: warning severity: warning
- alert: KubeQuotaAlmostFull - alert: KubeQuotaAlmostFull
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaalmostfull
summary: Namespace quota is going to be full. summary: Namespace quota is going to be full.
expr: | expr: |
@@ -1408,8 +1356,7 @@ spec:
severity: info severity: info
- alert: KubeQuotaFullyUsed - alert: KubeQuotaFullyUsed
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotafullyused
summary: Namespace quota is fully used. summary: Namespace quota is fully used.
expr: | expr: |
@@ -1422,8 +1369,7 @@ spec:
severity: info severity: info
- alert: KubeQuotaExceeded - alert: KubeQuotaExceeded
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage }} of its {{ $labels.resource }} quota.
}} of its {{ $labels.resource }} quota.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubequotaexceeded
summary: Namespace quota has exceeded the limits. summary: Namespace quota has exceeded the limits.
expr: | expr: |
@@ -1436,9 +1382,7 @@ spec:
severity: warning severity: warning
- alert: CPUThrottlingHigh - alert: CPUThrottlingHigh
annotations: annotations:
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace description: '{{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.'
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-cputhrottlinghigh
summary: Processes experience elevated CPU throttling. summary: Processes experience elevated CPU throttling.
expr: | expr: |
@@ -1453,9 +1397,7 @@ spec:
rules: rules:
- alert: KubePersistentVolumeFillingUp - alert: KubePersistentVolumeFillingUp
annotations: annotations:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }} free.
}} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage
}} free.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: | expr: |
@@ -1468,10 +1410,7 @@ spec:
severity: critical severity: critical
- alert: KubePersistentVolumeFillingUp - alert: KubePersistentVolumeFillingUp
annotations: annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{ description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is expected to fill up within four days. Currently {{ $value | humanizePercentage }} is available.
$labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace }} is
expected to fill up within four days. Currently {{ $value | humanizePercentage
}} is available.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumefillingup
summary: PersistentVolume is filling up. summary: PersistentVolume is filling up.
expr: | expr: |
@@ -1487,8 +1426,7 @@ spec:
severity: warning severity: warning
- alert: KubePersistentVolumeErrors - alert: KubePersistentVolumeErrors
annotations: annotations:
description: The persistent volume {{ $labels.persistentvolume }} has status description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
{{ $labels.phase }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning. summary: PersistentVolume is having issues with provisioning.
expr: | expr: |
@@ -1500,8 +1438,7 @@ spec:
rules: rules:
- alert: KubeVersionMismatch - alert: KubeVersionMismatch
annotations: annotations:
description: There are {{ $value }} different semantic versions of Kubernetes description: There are {{ $value }} different semantic versions of Kubernetes components running.
components running.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeversionmismatch
summary: Different semantic versions of Kubernetes components running. summary: Different semantic versions of Kubernetes components running.
expr: | expr: |
@@ -1511,8 +1448,7 @@ spec:
severity: warning severity: warning
- alert: KubeClientErrors - alert: KubeClientErrors
annotations: annotations:
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
}}' is experiencing {{ $value | humanizePercentage }} errors.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclienterrors
summary: Kubernetes API server client is experiencing errors. summary: Kubernetes API server client is experiencing errors.
expr: | expr: |
@@ -1585,8 +1521,7 @@ spec:
rules: rules:
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is description: A client certificate used to authenticate to the apiserver is expiring in less than 7.0 days.
expiring in less than 7.0 days.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
@@ -1595,8 +1530,7 @@ spec:
severity: warning severity: warning
- alert: KubeClientCertificateExpiration - alert: KubeClientCertificateExpiration
annotations: annotations:
description: A client certificate used to authenticate to the apiserver is description: A client certificate used to authenticate to the apiserver is expiring in less than 24.0 hours.
expiring in less than 24.0 hours.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeclientcertificateexpiration
summary: Client certificate is about to expire. summary: Client certificate is about to expire.
expr: | expr: |
@@ -1605,10 +1539,7 @@ spec:
severity: critical severity: critical
- alert: AggregatedAPIErrors - alert: AggregatedAPIErrors
annotations: annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
has reported errors. The number of errors have increased for it in the past
five minutes. High values indicate that the availability of the service
changes too often.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapierrors
summary: An aggregated API has reported errors. summary: An aggregated API has reported errors.
expr: | expr: |
@@ -1617,8 +1548,7 @@ spec:
severity: warning severity: warning
- alert: AggregatedAPIDown - alert: AggregatedAPIDown
annotations: annotations:
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has been only {{ $value | humanize }}% available over the last 10m.
has been only {{ $value | humanize }}% available over the last 10m.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-aggregatedapidown
summary: An aggregated API is down. summary: An aggregated API is down.
expr: | expr: |
@@ -1650,8 +1580,7 @@ spec:
severity: warning severity: warning
- alert: KubeNodeUnreachable - alert: KubeNodeUnreachable
annotations: annotations:
description: '{{ $labels.node }} is unreachable and some workloads may be description: '{{ $labels.node }} is unreachable and some workloads may be rescheduled.'
rescheduled.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodeunreachable
summary: Node is unreachable. summary: Node is unreachable.
expr: | expr: |
@@ -1661,8 +1590,7 @@ spec:
severity: warning severity: warning
- alert: KubeletTooManyPods - alert: KubeletTooManyPods
annotations: annotations:
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage }} of its Pod capacity.
}} of its Pod capacity.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubelettoomanypods
summary: Kubelet is running at capacity. summary: Kubelet is running at capacity.
expr: | expr: |
@@ -1678,8 +1606,7 @@ spec:
severity: warning severity: warning
- alert: KubeNodeReadinessFlapping - alert: KubeNodeReadinessFlapping
annotations: annotations:
description: The readiness status of node {{ $labels.node }} has changed {{ description: The readiness status of node {{ $labels.node }} has changed {{ $value }} times in the last 15 minutes.
$value }} times in the last 15 minutes.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubenodereadinessflapping
summary: Node readiness status is flapping. summary: Node readiness status is flapping.
expr: | expr: |
@@ -1689,8 +1616,7 @@ spec:
severity: warning severity: warning
- alert: KubeletPlegDurationHigh - alert: KubeletPlegDurationHigh
annotations: annotations:
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{ $value }} seconds on node {{ $labels.node }}.
duration of {{ $value }} seconds on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletplegdurationhigh
summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist.
expr: | expr: |
@@ -1700,8 +1626,7 @@ spec:
severity: warning severity: warning
- alert: KubeletPodStartUpLatencyHigh - alert: KubeletPodStartUpLatencyHigh
annotations: annotations:
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds on node {{ $labels.node }}.
on node {{ $labels.node }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high. summary: Kubelet Pod startup latency is too high.
expr: | expr: |
@@ -1711,8 +1636,7 @@ spec:
severity: warning severity: warning
- alert: KubeletClientCertificateExpiration - alert: KubeletClientCertificateExpiration
annotations: annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire. summary: Kubelet client certificate is about to expire.
expr: | expr: |
@@ -1721,8 +1645,7 @@ spec:
severity: warning severity: warning
- alert: KubeletClientCertificateExpiration - alert: KubeletClientCertificateExpiration
annotations: annotations:
description: Client certificate for Kubelet on node {{ $labels.node }} expires description: Client certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificateexpiration
summary: Kubelet client certificate is about to expire. summary: Kubelet client certificate is about to expire.
expr: | expr: |
@@ -1731,8 +1654,7 @@ spec:
severity: critical severity: critical
- alert: KubeletServerCertificateExpiration - alert: KubeletServerCertificateExpiration
annotations: annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire. summary: Kubelet server certificate is about to expire.
expr: | expr: |
@@ -1741,8 +1663,7 @@ spec:
severity: warning severity: warning
- alert: KubeletServerCertificateExpiration - alert: KubeletServerCertificateExpiration
annotations: annotations:
description: Server certificate for Kubelet on node {{ $labels.node }} expires description: Server certificate for Kubelet on node {{ $labels.node }} expires in {{ $value | humanizeDuration }}.
in {{ $value | humanizeDuration }}.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificateexpiration
summary: Kubelet server certificate is about to expire. summary: Kubelet server certificate is about to expire.
expr: | expr: |
@@ -1751,8 +1672,7 @@ spec:
severity: critical severity: critical
- alert: KubeletClientCertificateRenewalErrors - alert: KubeletClientCertificateRenewalErrors
annotations: annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its client description: Kubelet on node {{ $labels.node }} has failed to renew its client certificate ({{ $value | humanize }} errors in the last 5 minutes).
certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletclientcertificaterenewalerrors
summary: Kubelet has failed to renew its client certificate. summary: Kubelet has failed to renew its client certificate.
expr: | expr: |
@@ -1762,8 +1682,7 @@ spec:
severity: warning severity: warning
- alert: KubeletServerCertificateRenewalErrors - alert: KubeletServerCertificateRenewalErrors
annotations: annotations:
description: Kubelet on node {{ $labels.node }} has failed to renew its server description: Kubelet on node {{ $labels.node }} has failed to renew its server certificate ({{ $value | humanize }} errors in the last 5 minutes).
certificate ({{ $value | humanize }} errors in the last 5 minutes).
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubeletservercertificaterenewalerrors
summary: Kubelet has failed to renew its server certificate. summary: Kubelet has failed to renew its server certificate.
expr: | expr: |
@@ -1797,8 +1716,7 @@ spec:
rules: rules:
- alert: KubeControllerManagerDown - alert: KubeControllerManagerDown
annotations: annotations:
description: KubeControllerManager has disappeared from Prometheus target description: KubeControllerManager has disappeared from Prometheus target discovery.
discovery.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecontrollermanagerdown
summary: Target disappeared from Prometheus target discovery. summary: Target disappeared from Prometheus target discovery.
expr: | expr: |
@@ -1810,8 +1728,7 @@ spec:
rules: rules:
- alert: PrometheusBadConfig - alert: PrometheusBadConfig
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to reload its configuration.
reload its configuration.
summary: Failed Prometheus configuration reload. summary: Failed Prometheus configuration reload.
expr: | expr: |
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@@ -1822,10 +1739,8 @@ spec:
severity: critical severity: critical
- alert: PrometheusNotificationQueueRunningFull - alert: PrometheusNotificationQueueRunningFull
annotations: annotations:
description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} is running full.
is running full. summary: Prometheus alert notification queue predicted to run full in less than 30m.
summary: Prometheus alert notification queue predicted to run full in less
than 30m.
expr: | expr: |
# Without min_over_time, failed scrapes could create false negatives, see # Without min_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@@ -1839,10 +1754,8 @@ spec:
severity: warning severity: warning
- alert: PrometheusErrorSendingAlertsToSomeAlertmanagers - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers
annotations: annotations:
description: '{{ printf "%.1f" $value }}% errors while sending alerts from description: '{{ printf "%.1f" $value }}% errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.'
Prometheus {{$labels.namespace}}/{{$labels.pod}} to Alertmanager {{$labels.alertmanager}}.' summary: Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.
summary: Prometheus has encountered more than 1% errors sending alerts to
a specific Alertmanager.
expr: | expr: |
( (
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m]) rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
@@ -1856,8 +1769,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations: annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
expr: | expr: |
min without(alertmanager) ( min without(alertmanager) (
@@ -1872,8 +1784,7 @@ spec:
severity: critical severity: critical
- alert: PrometheusNotConnectedToAlertmanagers - alert: PrometheusNotConnectedToAlertmanagers
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected to any Alertmanagers.
to any Alertmanagers.
summary: Prometheus is not connected to any Alertmanagers. summary: Prometheus is not connected to any Alertmanagers.
expr: | expr: |
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@@ -1884,8 +1795,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusTSDBReloadsFailing - alert: PrometheusTSDBReloadsFailing
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} reload failures over the last 3h.
{{$value | humanize}} reload failures over the last 3h.
summary: Prometheus has issues reloading blocks from disk. summary: Prometheus has issues reloading blocks from disk.
expr: | expr: |
increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 increase(prometheus_tsdb_reloads_failures_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
@@ -1894,8 +1804,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusTSDBCompactionsFailing - alert: PrometheusTSDBCompactionsFailing
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected {{$value | humanize}} compaction failures over the last 3h.
{{$value | humanize}} compaction failures over the last 3h.
summary: Prometheus has issues compacting blocks. summary: Prometheus has issues compacting blocks.
expr: | expr: |
increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0 increase(prometheus_tsdb_compactions_failed_total{job="prometheus-k8s",namespace="monitoring"}[3h]) > 0
@@ -1904,8 +1813,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusNotIngestingSamples - alert: PrometheusNotIngestingSamples
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting samples.
samples.
summary: Prometheus is not ingesting samples. summary: Prometheus is not ingesting samples.
expr: | expr: |
rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0 rate(prometheus_tsdb_head_samples_appended_total{job="prometheus-k8s",namespace="monitoring"}[5m]) <= 0
@@ -1914,9 +1822,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusDuplicateTimestamps - alert: PrometheusDuplicateTimestamps
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with different values but duplicated timestamp.
{{ printf "%.4g" $value }} samples/s with different values but duplicated
timestamp.
summary: Prometheus is dropping samples with duplicate timestamps. summary: Prometheus is dropping samples with duplicate timestamps.
expr: | expr: |
rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1925,8 +1831,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusOutOfOrderTimestamps - alert: PrometheusOutOfOrderTimestamps
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping {{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
{{ printf "%.4g" $value }} samples/s with timestamps arriving out of order.
summary: Prometheus drops samples with out-of-order timestamps. summary: Prometheus drops samples with out-of-order timestamps.
expr: | expr: |
rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 rate(prometheus_target_scrapes_sample_out_of_order_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -1935,9 +1840,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusRemoteStorageFailures - alert: PrometheusRemoteStorageFailures
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to send {{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{ $labels.url }}
{{ printf "%.1f" $value }}% of the samples to {{ $labels.remote_name}}:{{
$labels.url }}
summary: Prometheus fails to send samples to remote storage. summary: Prometheus fails to send samples to remote storage.
expr: | expr: |
( (
@@ -1956,9 +1859,7 @@ spec:
severity: critical severity: critical
- alert: PrometheusRemoteWriteBehind - alert: PrometheusRemoteWriteBehind
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url }}.
is {{ printf "%.1f" $value }}s behind for {{ $labels.remote_name}}:{{ $labels.url
}}.
summary: Prometheus remote write is behind. summary: Prometheus remote write is behind.
expr: | expr: |
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
@@ -1974,13 +1875,8 @@ spec:
severity: critical severity: critical
- alert: PrometheusRemoteWriteDesiredShards - alert: PrometheusRemoteWriteDesiredShards
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write desired shards calculation wants to run {{ $value }} shards for queue {{ $labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{ printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}` $labels.instance | query | first | value }}.
desired shards calculation wants to run {{ $value }} shards for queue {{ summary: Prometheus remote write desired shards calculation wants to run more than configured max shards.
$labels.remote_name}}:{{ $labels.url }}, which is more than the max of {{
printf `prometheus_remote_storage_shards_max{instance="%s",job="prometheus-k8s",namespace="monitoring"}`
$labels.instance | query | first | value }}.
summary: Prometheus remote write desired shards calculation wants to run more
than configured max shards.
expr: | expr: |
# Without max_over_time, failed scrapes could create false negatives, see # Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
@@ -1994,8 +1890,7 @@ spec:
severity: warning severity: warning
- alert: PrometheusRuleFailures - alert: PrometheusRuleFailures
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed to evaluate {{ printf "%.0f" $value }} rules in the last 5m.
evaluate {{ printf "%.0f" $value }} rules in the last 5m.
summary: Prometheus is failing rule evaluations. summary: Prometheus is failing rule evaluations.
expr: | expr: |
increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 increase(prometheus_rule_evaluation_failures_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -2004,8 +1899,7 @@ spec:
severity: critical severity: critical
- alert: PrometheusMissingRuleEvaluations - alert: PrometheusMissingRuleEvaluations
annotations: annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed {{ printf "%.0f" $value }} rule group evaluations in the last 5m.
printf "%.0f" $value }} rule group evaluations in the last 5m.
summary: Prometheus is missing rule evaluations due to slow rule group evaluation. summary: Prometheus is missing rule evaluations due to slow rule group evaluation.
expr: | expr: |
increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0 increase(prometheus_rule_group_iterations_missed_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
@@ -2028,8 +1922,7 @@ spec:
severity: critical severity: critical
- alert: AlertmanagerFailedReload - alert: AlertmanagerFailedReload
annotations: annotations:
message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace message: Reloading Alertmanager's configuration has failed for {{ $labels.namespace }}/{{ $labels.pod}}.
}}/{{ $labels.pod}}.
expr: | expr: |
alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0 alertmanager_config_last_reload_successful{job="alertmanager-main",namespace="monitoring"} == 0
for: 10m for: 10m
@@ -2049,10 +1942,8 @@ spec:
rules: rules:
- alert: TargetDown - alert: TargetDown
annotations: annotations:
message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service message: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace are down.'
}} targets in {{ $labels.namespace }} namespace are down.' expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job, namespace, service)) > 10
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY (job,
namespace, service)) > 10
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
@@ -2071,8 +1962,7 @@ spec:
rules: rules:
- alert: NodeNetworkInterfaceFlapping - alert: NodeNetworkInterfaceFlapping
annotations: annotations:
message: Network interface "{{ $labels.device }}" changing it's up status message: Network interface "{{ $labels.device }}" changing it's up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
expr: | expr: |
changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2
for: 2m for: 2m

View File

@@ -20,31 +20,24 @@ spec:
description: PodMonitor defines monitoring for a set of pods. description: PodMonitor defines monitoring for a set of pods.
properties: properties:
apiVersion: apiVersion:
description: 'APIVersion defines the versioned schema of this representation description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string type: string
kind: kind:
description: 'Kind is a string value representing the REST resource this description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string type: string
metadata: metadata:
type: object type: object
spec: spec:
description: Specification of desired Pod selection for target discovery description: Specification of desired Pod selection for target discovery by Prometheus.
by Prometheus.
properties: properties:
jobLabel: jobLabel:
description: The label to use to retrieve the job name from. description: The label to use to retrieve the job name from.
type: string type: string
namespaceSelector: namespaceSelector:
description: Selector to select which namespaces the Endpoints objects description: Selector to select which namespaces the Endpoints objects are discovered from.
are discovered from.
properties: properties:
any: any:
description: Boolean describing whether all namespaces are selected description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
in contrast to a list restricting them.
type: boolean type: boolean
matchNames: matchNames:
description: List of namespace names. description: List of namespace names.
@@ -55,63 +48,45 @@ spec:
podMetricsEndpoints: podMetricsEndpoints:
description: A list of endpoints allowed as part of this PodMonitor. description: A list of endpoints allowed as part of this PodMonitor.
items: items:
description: PodMetricsEndpoint defines a scrapeable endpoint of description: PodMetricsEndpoint defines a scrapeable endpoint of a Kubernetes Pod serving Prometheus metrics.
a Kubernetes Pod serving Prometheus metrics.
properties: properties:
honorLabels: honorLabels:
description: HonorLabels chooses the metric's labels on collisions description: HonorLabels chooses the metric's labels on collisions with target labels.
with target labels.
type: boolean type: boolean
honorTimestamps: honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
the timestamps present in scraped data.
type: boolean type: boolean
interval: interval:
description: Interval at which metrics should be scraped description: Interval at which metrics should be scraped
type: string type: string
metricRelabelings: metricRelabelings:
description: MetricRelabelConfigs to apply to samples before description: MetricRelabelConfigs to apply to samples before ingestion.
ingestion.
items: items:
description: 'RelabelConfig allows dynamic rewriting of the description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties: properties:
action: action:
description: Action to perform based on regex matching. description: Action to perform based on regex matching. Default is 'replace'
Default is 'replace'
type: string type: string
modulus: modulus:
description: Modulus to take of the hash of the source description: Modulus to take of the hash of the source label values.
label values.
format: int64 format: int64
type: integer type: integer
regex: regex:
description: Regular expression against which the extracted description: Regular expression against which the extracted value is matched. Default is '(.*)'
value is matched. Default is '(.*)'
type: string type: string
replacement: replacement:
description: Replacement value against which a regex replace description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string type: string
separator: separator:
description: Separator placed between concatenated source description: Separator placed between concatenated source label values. default is ';'.
label values. default is ';'.
type: string type: string
sourceLabels: sourceLabels:
description: The source labels select values from existing description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items: items:
type: string type: string
type: array type: array
targetLabel: targetLabel:
description: Label to which the resulting value is written description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string type: string
type: object type: object
type: array type: array
@@ -126,56 +101,39 @@ spec:
description: HTTP path to scrape for metrics. description: HTTP path to scrape for metrics.
type: string type: string
port: port:
description: Name of the pod port this endpoint refers to. Mutually description: Name of the pod port this endpoint refers to. Mutually exclusive with targetPort.
exclusive with targetPort.
type: string type: string
proxyUrl: proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
to proxy through this endpoint.
type: string type: string
relabelings: relabelings:
description: 'RelabelConfigs to apply to samples before ingestion. description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items: items:
description: 'RelabelConfig allows dynamic rewriting of the description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties: properties:
action: action:
description: Action to perform based on regex matching. description: Action to perform based on regex matching. Default is 'replace'
Default is 'replace'
type: string type: string
modulus: modulus:
description: Modulus to take of the hash of the source description: Modulus to take of the hash of the source label values.
label values.
format: int64 format: int64
type: integer type: integer
regex: regex:
description: Regular expression against which the extracted description: Regular expression against which the extracted value is matched. Default is '(.*)'
value is matched. Default is '(.*)'
type: string type: string
replacement: replacement:
description: Replacement value against which a regex replace description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string type: string
separator: separator:
description: Separator placed between concatenated source description: Separator placed between concatenated source label values. default is ';'.
label values. default is ';'.
type: string type: string
sourceLabels: sourceLabels:
description: The source labels select values from existing description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items: items:
type: string type: string
type: array type: array
targetLabel: targetLabel:
description: Label to which the resulting value is written description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string type: string
type: object type: object
type: array type: array
@@ -194,42 +152,30 @@ spec:
type: object type: object
type: array type: array
podTargetLabels: podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
onto the target.
items: items:
type: string type: string
type: array type: array
sampleLimit: sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
samples that will be accepted.
format: int64 format: int64
type: integer type: integer
selector: selector:
description: Selector to select Pod objects. description: Selector to select Pod objects.
properties: properties:
matchExpressions: matchExpressions:
description: matchExpressions is a list of label selector requirements. description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
The requirements are ANDed.
items: items:
description: A label selector requirement is a selector that description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
contains values, a key, and an operator that relates the key
and values.
properties: properties:
key: key:
description: key is the label key that the selector applies description: key is the label key that the selector applies to.
to.
type: string type: string
operator: operator:
description: operator represents a key's relationship to description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string type: string
values: values:
description: values is an array of string values. If the description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items: items:
type: string type: string
type: array type: array
@@ -241,11 +187,7 @@ spec:
matchLabels: matchLabels:
additionalProperties: additionalProperties:
type: string type: string
description: matchLabels is a map of {key,value} pairs. A single description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object type: object
type: object type: object
required: required:

View File

@@ -20,37 +20,27 @@ spec:
description: Probe defines monitoring for a set of static targets or ingresses. description: Probe defines monitoring for a set of static targets or ingresses.
properties: properties:
apiVersion: apiVersion:
description: 'APIVersion defines the versioned schema of this representation description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string type: string
kind: kind:
description: 'Kind is a string value representing the REST resource this description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string type: string
metadata: metadata:
type: object type: object
spec: spec:
description: Specification of desired Ingress selection for target discovery description: Specification of desired Ingress selection for target discovery by Prometheus.
by Prometheus.
properties: properties:
interval: interval:
description: Interval at which targets are probed using the configured description: Interval at which targets are probed using the configured prober. If not specified Prometheus' global scrape interval is used.
prober. If not specified Prometheus' global scrape interval is used.
type: string type: string
jobName: jobName:
description: The job name assigned to scraped metrics by default. description: The job name assigned to scraped metrics by default.
type: string type: string
module: module:
description: 'The module to use for probing specifying how to probe description: 'The module to use for probing specifying how to probe the target. Example module configuring in the blackbox exporter: https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
the target. Example module configuring in the blackbox exporter:
https://github.com/prometheus/blackbox_exporter/blob/master/example.yml'
type: string type: string
prober: prober:
description: Specification for the prober to use for probing targets. description: Specification for the prober to use for probing targets. The prober.URL parameter is required. Targets cannot be probed if left empty.
The prober.URL parameter is required. Targets cannot be probed if
left empty.
properties: properties:
path: path:
description: Path to collect metrics from. Defaults to `/probe`. description: Path to collect metrics from. Defaults to `/probe`.
@@ -68,19 +58,16 @@ spec:
description: Timeout for scraping metrics from the Prometheus exporter. description: Timeout for scraping metrics from the Prometheus exporter.
type: string type: string
targets: targets:
description: Targets defines a set of static and/or dynamically discovered description: Targets defines a set of static and/or dynamically discovered targets to be probed using the prober.
targets to be probed using the prober.
properties: properties:
ingress: ingress:
description: Ingress defines the set of dynamically discovered description: Ingress defines the set of dynamically discovered ingress objects which hosts are considered for probing.
ingress objects which hosts are considered for probing.
properties: properties:
namespaceSelector: namespaceSelector:
description: Select Ingress objects by namespace. description: Select Ingress objects by namespace.
properties: properties:
any: any:
description: Boolean describing whether all namespaces description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
are selected in contrast to a list restricting them.
type: boolean type: boolean
matchNames: matchNames:
description: List of namespace names. description: List of namespace names.
@@ -89,48 +76,33 @@ spec:
type: array type: array
type: object type: object
relabelingConfigs: relabelingConfigs:
description: 'RelabelConfigs to apply to samples before ingestion. description: 'RelabelConfigs to apply to samples before ingestion. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items: items:
description: 'RelabelConfig allows dynamic rewriting of description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
the label set, being applied to samples before ingestion.
It defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties: properties:
action: action:
description: Action to perform based on regex matching. description: Action to perform based on regex matching. Default is 'replace'
Default is 'replace'
type: string type: string
modulus: modulus:
description: Modulus to take of the hash of the source description: Modulus to take of the hash of the source label values.
label values.
format: int64 format: int64
type: integer type: integer
regex: regex:
description: Regular expression against which the extracted description: Regular expression against which the extracted value is matched. Default is '(.*)'
value is matched. Default is '(.*)'
type: string type: string
replacement: replacement:
description: Replacement value against which a regex description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
replace is performed if the regular expression matches.
Regex capture groups are available. Default is '$1'
type: string type: string
separator: separator:
description: Separator placed between concatenated source description: Separator placed between concatenated source label values. default is ';'.
label values. default is ';'.
type: string type: string
sourceLabels: sourceLabels:
description: The source labels select values from existing description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items: items:
type: string type: string
type: array type: array
targetLabel: targetLabel:
description: Label to which the resulting value is written description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string type: string
type: object type: object
type: array type: array
@@ -138,29 +110,18 @@ spec:
description: Select Ingress objects by labels. description: Select Ingress objects by labels.
properties: properties:
matchExpressions: matchExpressions:
description: matchExpressions is a list of label selector description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
requirements. The requirements are ANDed.
items: items:
description: A label selector requirement is a selector description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
that contains values, a key, and an operator that
relates the key and values.
properties: properties:
key: key:
description: key is the label key that the selector description: key is the label key that the selector applies to.
applies to.
type: string type: string
operator: operator:
description: operator represents a key's relationship description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
to a set of values. Valid operators are In, NotIn,
Exists and DoesNotExist.
type: string type: string
values: values:
description: values is an array of string values. description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
If the operator is In or NotIn, the values array
must be non-empty. If the operator is Exists or
DoesNotExist, the values array must be empty.
This array is replaced during a strategic merge
patch.
items: items:
type: string type: string
type: array type: array
@@ -172,27 +133,20 @@ spec:
matchLabels: matchLabels:
additionalProperties: additionalProperties:
type: string type: string
description: matchLabels is a map of {key,value} pairs. description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
A single {key,value} in the matchLabels map is equivalent
to an element of matchExpressions, whose key field is
"key", the operator is "In", and the values array contains
only "value". The requirements are ANDed.
type: object type: object
type: object type: object
type: object type: object
staticConfig: staticConfig:
description: 'StaticConfig defines static targets which are considers description: 'StaticConfig defines static targets which are considers for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
for probing. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#static_config.'
properties: properties:
labels: labels:
additionalProperties: additionalProperties:
type: string type: string
description: Labels assigned to all metrics scraped from the description: Labels assigned to all metrics scraped from the targets.
targets.
type: object type: object
static: static:
description: Targets is a list of URLs to probe using the description: Targets is a list of URLs to probe using the configured prober.
configured prober.
items: items:
type: string type: string
type: array type: array

View File

@@ -20,14 +20,10 @@ spec:
description: PrometheusRule defines alerting rules for a Prometheus instance description: PrometheusRule defines alerting rules for a Prometheus instance
properties: properties:
apiVersion: apiVersion:
description: 'APIVersion defines the versioned schema of this representation description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string type: string
kind: kind:
description: 'Kind is a string value representing the REST resource this description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string type: string
metadata: metadata:
type: object type: object
@@ -37,10 +33,7 @@ spec:
groups: groups:
description: Content of Prometheus rule file description: Content of Prometheus rule file
items: items:
description: 'RuleGroup is a list of sequentially evaluated recording description: 'RuleGroup is a list of sequentially evaluated recording and alerting rules. Note: PartialResponseStrategy is only used by ThanosRuler and will be ignored by Prometheus instances. Valid values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
and alerting rules. Note: PartialResponseStrategy is only used
by ThanosRuler and will be ignored by Prometheus instances. Valid
values for this field are ''warn'' or ''abort''. More info: https://github.com/thanos-io/thanos/blob/master/docs/components/rule.md#partial-response'
properties: properties:
interval: interval:
type: string type: string

View File

@@ -20,65 +20,50 @@ spec:
description: ServiceMonitor defines monitoring for a set of services. description: ServiceMonitor defines monitoring for a set of services.
properties: properties:
apiVersion: apiVersion:
description: 'APIVersion defines the versioned schema of this representation description: 'APIVersion defines the versioned schema of this representation of an object. Servers should convert recognized schemas to the latest internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string type: string
kind: kind:
description: 'Kind is a string value representing the REST resource this description: 'Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string type: string
metadata: metadata:
type: object type: object
spec: spec:
description: Specification of desired Service selection for target discovery description: Specification of desired Service selection for target discovery by Prometheus.
by Prometheus.
properties: properties:
endpoints: endpoints:
description: A list of endpoints allowed as part of this ServiceMonitor. description: A list of endpoints allowed as part of this ServiceMonitor.
items: items:
description: Endpoint defines a scrapeable endpoint serving Prometheus description: Endpoint defines a scrapeable endpoint serving Prometheus metrics.
metrics.
properties: properties:
basicAuth: basicAuth:
description: 'BasicAuth allow an endpoint to authenticate over description: 'BasicAuth allow an endpoint to authenticate over basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
basic authentication More info: https://prometheus.io/docs/operating/configuration/#endpoints'
properties: properties:
password: password:
description: The secret in the service monitor namespace description: The secret in the service monitor namespace that contains the password for authentication.
that contains the password for authentication.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key must description: Specify whether the Secret or its key must be defined
be defined
type: boolean type: boolean
required: required:
- key - key
type: object type: object
username: username:
description: The secret in the service monitor namespace description: The secret in the service monitor namespace that contains the username for authentication.
that contains the username for authentication.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key must description: Specify whether the Secret or its key must be defined
be defined
type: boolean type: boolean
required: required:
- key - key
@@ -88,79 +73,57 @@ spec:
description: File to read bearer token for scraping targets. description: File to read bearer token for scraping targets.
type: string type: string
bearerTokenSecret: bearerTokenSecret:
description: Secret to mount to read bearer token for scraping description: Secret to mount to read bearer token for scraping targets. The secret needs to be in the same namespace as the service monitor and accessible by the Prometheus Operator.
targets. The secret needs to be in the same namespace as the
service monitor and accessible by the Prometheus Operator.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key must description: Specify whether the Secret or its key must be defined
be defined
type: boolean type: boolean
required: required:
- key - key
type: object type: object
honorLabels: honorLabels:
description: HonorLabels chooses the metric's labels on collisions description: HonorLabels chooses the metric's labels on collisions with target labels.
with target labels.
type: boolean type: boolean
honorTimestamps: honorTimestamps:
description: HonorTimestamps controls whether Prometheus respects description: HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data.
the timestamps present in scraped data.
type: boolean type: boolean
interval: interval:
description: Interval at which metrics should be scraped description: Interval at which metrics should be scraped
type: string type: string
metricRelabelings: metricRelabelings:
description: MetricRelabelConfigs to apply to samples before description: MetricRelabelConfigs to apply to samples before ingestion.
ingestion.
items: items:
description: 'RelabelConfig allows dynamic rewriting of the description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties: properties:
action: action:
description: Action to perform based on regex matching. description: Action to perform based on regex matching. Default is 'replace'
Default is 'replace'
type: string type: string
modulus: modulus:
description: Modulus to take of the hash of the source description: Modulus to take of the hash of the source label values.
label values.
format: int64 format: int64
type: integer type: integer
regex: regex:
description: Regular expression against which the extracted description: Regular expression against which the extracted value is matched. Default is '(.*)'
value is matched. Default is '(.*)'
type: string type: string
replacement: replacement:
description: Replacement value against which a regex replace description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string type: string
separator: separator:
description: Separator placed between concatenated source description: Separator placed between concatenated source label values. default is ';'.
label values. default is ';'.
type: string type: string
sourceLabels: sourceLabels:
description: The source labels select values from existing description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items: items:
type: string type: string
type: array type: array
targetLabel: targetLabel:
description: Label to which the resulting value is written description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string type: string
type: object type: object
type: array type: array
@@ -175,56 +138,39 @@ spec:
description: HTTP path to scrape for metrics. description: HTTP path to scrape for metrics.
type: string type: string
port: port:
description: Name of the service port this endpoint refers to. description: Name of the service port this endpoint refers to. Mutually exclusive with targetPort.
Mutually exclusive with targetPort.
type: string type: string
proxyUrl: proxyUrl:
description: ProxyURL eg http://proxyserver:2195 Directs scrapes description: ProxyURL eg http://proxyserver:2195 Directs scrapes to proxy through this endpoint.
to proxy through this endpoint.
type: string type: string
relabelings: relabelings:
description: 'RelabelConfigs to apply to samples before scraping. description: 'RelabelConfigs to apply to samples before scraping. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config'
items: items:
description: 'RelabelConfig allows dynamic rewriting of the description: 'RelabelConfig allows dynamic rewriting of the label set, being applied to samples before ingestion. It defines `<metric_relabel_configs>`-section of Prometheus configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
label set, being applied to samples before ingestion. It
defines `<metric_relabel_configs>`-section of Prometheus
configuration. More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs'
properties: properties:
action: action:
description: Action to perform based on regex matching. description: Action to perform based on regex matching. Default is 'replace'
Default is 'replace'
type: string type: string
modulus: modulus:
description: Modulus to take of the hash of the source description: Modulus to take of the hash of the source label values.
label values.
format: int64 format: int64
type: integer type: integer
regex: regex:
description: Regular expression against which the extracted description: Regular expression against which the extracted value is matched. Default is '(.*)'
value is matched. Default is '(.*)'
type: string type: string
replacement: replacement:
description: Replacement value against which a regex replace description: Replacement value against which a regex replace is performed if the regular expression matches. Regex capture groups are available. Default is '$1'
is performed if the regular expression matches. Regex
capture groups are available. Default is '$1'
type: string type: string
separator: separator:
description: Separator placed between concatenated source description: Separator placed between concatenated source label values. default is ';'.
label values. default is ';'.
type: string type: string
sourceLabels: sourceLabels:
description: The source labels select values from existing description: The source labels select values from existing labels. Their content is concatenated using the configured separator and matched against the configured regular expression for the replace, keep, and drop actions.
labels. Their content is concatenated using the configured
separator and matched against the configured regular
expression for the replace, keep, and drop actions.
items: items:
type: string type: string
type: array type: array
targetLabel: targetLabel:
description: Label to which the resulting value is written description: Label to which the resulting value is written in a replace action. It is mandatory for replace actions. Regex capture groups are available.
in a replace action. It is mandatory for replace actions.
Regex capture groups are available.
type: string type: string
type: object type: object
type: array type: array
@@ -238,32 +184,25 @@ spec:
anyOf: anyOf:
- type: integer - type: integer
- type: string - type: string
description: Name or number of the target port of the Pod behind description: Name or number of the target port of the Pod behind the Service, the port must be specified with container port property. Mutually exclusive with port.
the Service, the port must be specified with container port
property. Mutually exclusive with port.
x-kubernetes-int-or-string: true x-kubernetes-int-or-string: true
tlsConfig: tlsConfig:
description: TLS configuration to use when scraping the endpoint description: TLS configuration to use when scraping the endpoint
properties: properties:
ca: ca:
description: Stuct containing the CA cert to use for the description: Stuct containing the CA cert to use for the targets.
targets.
properties: properties:
configMap: configMap:
description: ConfigMap containing data to use for the description: ConfigMap containing data to use for the targets.
targets.
properties: properties:
key: key:
description: The key to select. description: The key to select.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string type: string
optional: optional:
description: Specify whether the ConfigMap or its description: Specify whether the ConfigMap or its key must be defined
key must be defined
type: boolean type: boolean
required: required:
- key - key
@@ -272,45 +211,35 @@ spec:
description: Secret containing data to use for the targets. description: Secret containing data to use for the targets.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key description: Specify whether the Secret or its key must be defined
must be defined
type: boolean type: boolean
required: required:
- key - key
type: object type: object
type: object type: object
caFile: caFile:
description: Path to the CA cert in the Prometheus container description: Path to the CA cert in the Prometheus container to use for the targets.
to use for the targets.
type: string type: string
cert: cert:
description: Struct containing the client cert file for description: Struct containing the client cert file for the targets.
the targets.
properties: properties:
configMap: configMap:
description: ConfigMap containing data to use for the description: ConfigMap containing data to use for the targets.
targets.
properties: properties:
key: key:
description: The key to select. description: The key to select.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string type: string
optional: optional:
description: Specify whether the ConfigMap or its description: Specify whether the ConfigMap or its key must be defined
key must be defined
type: boolean type: boolean
required: required:
- key - key
@@ -319,48 +248,38 @@ spec:
description: Secret containing data to use for the targets. description: Secret containing data to use for the targets.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind,
uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key description: Specify whether the Secret or its key must be defined
must be defined
type: boolean type: boolean
required: required:
- key - key
type: object type: object
type: object type: object
certFile: certFile:
description: Path to the client cert file in the Prometheus description: Path to the client cert file in the Prometheus container for the targets.
container for the targets.
type: string type: string
insecureSkipVerify: insecureSkipVerify:
description: Disable target certificate validation. description: Disable target certificate validation.
type: boolean type: boolean
keyFile: keyFile:
description: Path to the client key file in the Prometheus description: Path to the client key file in the Prometheus container for the targets.
container for the targets.
type: string type: string
keySecret: keySecret:
description: Secret containing the client key file for the description: Secret containing the client key file for the targets.
targets.
properties: properties:
key: key:
description: The key of the secret to select from. Must description: The key of the secret to select from. Must be a valid secret key.
be a valid secret key.
type: string type: string
name: name:
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names TODO: Add other useful fields. apiVersion, kind, uid?'
TODO: Add other useful fields. apiVersion, kind, uid?'
type: string type: string
optional: optional:
description: Specify whether the Secret or its key must description: Specify whether the Secret or its key must be defined
be defined
type: boolean type: boolean
required: required:
- key - key
@@ -375,12 +294,10 @@ spec:
description: The label to use to retrieve the job name from. description: The label to use to retrieve the job name from.
type: string type: string
namespaceSelector: namespaceSelector:
description: Selector to select which namespaces the Endpoints objects description: Selector to select which namespaces the Endpoints objects are discovered from.
are discovered from.
properties: properties:
any: any:
description: Boolean describing whether all namespaces are selected description: Boolean describing whether all namespaces are selected in contrast to a list restricting them.
in contrast to a list restricting them.
type: boolean type: boolean
matchNames: matchNames:
description: List of namespace names. description: List of namespace names.
@@ -389,42 +306,30 @@ spec:
type: array type: array
type: object type: object
podTargetLabels: podTargetLabels:
description: PodTargetLabels transfers labels on the Kubernetes Pod description: PodTargetLabels transfers labels on the Kubernetes Pod onto the target.
onto the target.
items: items:
type: string type: string
type: array type: array
sampleLimit: sampleLimit:
description: SampleLimit defines per-scrape limit on number of scraped description: SampleLimit defines per-scrape limit on number of scraped samples that will be accepted.
samples that will be accepted.
format: int64 format: int64
type: integer type: integer
selector: selector:
description: Selector to select Endpoints objects. description: Selector to select Endpoints objects.
properties: properties:
matchExpressions: matchExpressions:
description: matchExpressions is a list of label selector requirements. description: matchExpressions is a list of label selector requirements. The requirements are ANDed.
The requirements are ANDed.
items: items:
description: A label selector requirement is a selector that description: A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
contains values, a key, and an operator that relates the key
and values.
properties: properties:
key: key:
description: key is the label key that the selector applies description: key is the label key that the selector applies to.
to.
type: string type: string
operator: operator:
description: operator represents a key's relationship to description: operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
a set of values. Valid operators are In, NotIn, Exists
and DoesNotExist.
type: string type: string
values: values:
description: values is an array of string values. If the description: values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
operator is In or NotIn, the values array must be non-empty.
If the operator is Exists or DoesNotExist, the values
array must be empty. This array is replaced during a strategic
merge patch.
items: items:
type: string type: string
type: array type: array
@@ -436,16 +341,11 @@ spec:
matchLabels: matchLabels:
additionalProperties: additionalProperties:
type: string type: string
description: matchLabels is a map of {key,value} pairs. A single description: matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
{key,value} in the matchLabels map is equivalent to an element
of matchExpressions, whose key field is "key", the operator
is "In", and the values array contains only "value". The requirements
are ANDed.
type: object type: object
type: object type: object
targetLabels: targetLabels:
description: TargetLabels transfers labels on the Kubernetes Service description: TargetLabels transfers labels on the Kubernetes Service onto the target.
onto the target.
items: items:
type: string type: string
type: array type: array