manifests: regenerate
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -72,6 +72,12 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/nodes
|
||||
name: grafana-dashboard-nodes
|
||||
readOnly: false
|
||||
@@ -141,6 +147,12 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-kubelet
|
||||
name: grafana-dashboard-kubelet
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
- configMap:
|
||||
name: grafana-dashboard-nodes
|
||||
name: grafana-dashboard-nodes
|
||||
|
@@ -26,7 +26,7 @@ spec:
|
||||
name: node-exporter
|
||||
resources:
|
||||
limits:
|
||||
cpu: 102m
|
||||
cpu: 250m
|
||||
memory: 180Mi
|
||||
requests:
|
||||
cpu: 102m
|
||||
|
@@ -8,6 +8,67 @@ metadata:
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: node-exporter.rules
|
||||
rules:
|
||||
- expr: |
|
||||
count without (cpu) (
|
||||
count without (mode) (
|
||||
node_cpu_seconds_total{job="node-exporter"}
|
||||
)
|
||||
)
|
||||
record: instance:node_num_cpu:sum
|
||||
- expr: |
|
||||
1 - avg without (cpu, mode) (
|
||||
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate1m
|
||||
- expr: |
|
||||
(
|
||||
node_load1{job="node-exporter"}
|
||||
/
|
||||
instance:node_num_cpu:sum{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_load1_per_cpu:ratio
|
||||
- expr: |
|
||||
1 - (
|
||||
node_memory_MemAvailable_bytes{job="node-exporter"}
|
||||
/
|
||||
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: |
|
||||
(
|
||||
rate(node_vmstat_pgpgin{job="node-exporter"}[1m])
|
||||
+
|
||||
rate(node_vmstat_pgpgout{job="node-exporter"}[1m])
|
||||
)
|
||||
record: instance:node_memory_swap_io_pages:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
- name: k8s.rules
|
||||
rules:
|
||||
- expr: |
|
||||
@@ -345,6 +406,162 @@ spec:
|
||||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total)
|
||||
BY (instance, cpu))
|
||||
record: cluster:node_cpu:ratio
|
||||
- name: node-exporter
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",} / node_filesystem_size_bytes{job="node-exporter",} < 0.4
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",} / node_filesystem_size_bytes{job="node-exporter",} < 0.2
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",} / node_filesystem_size_bytes{job="node-exporter",} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",} / node_filesystem_size_bytes{job="node-exporter",} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",} / node_filesystem_files{job="node-exporter",} < 0.4
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",} / node_filesystem_files{job="node-exporter",} < 0.2
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",} / node_filesystem_files{job="node-exporter",} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",} / node_filesystem_files{job="node-exporter",} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
increase(node_network_receive_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
increase(node_network_transmit_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kubernetes-absent
|
||||
rules:
|
||||
- alert: AlertmanagerDown
|
||||
@@ -1097,26 +1314,6 @@ spec:
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- name: kube-prometheus-node-alerting.rules
|
||||
rules:
|
||||
- alert: NodeDiskRunningFull
|
||||
annotations:
|
||||
message: Device {{ $labels.device }} on node {{ $labels.instance }} will be
|
||||
full within the next 24 hours.
|
||||
expr: |
|
||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[6h], 3600 * 24) < 0)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeDiskRunningFull
|
||||
annotations:
|
||||
message: Device {{ $labels.device }} on node {{ $labels.instance }} will be
|
||||
full within the next 2 hours.
|
||||
expr: |
|
||||
(node:node_filesystem_usage: > 0.85) and (predict_linear(node:node_filesystem_avail:[30m], 3600 * 2) < 0)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: node-time
|
||||
rules:
|
||||
- alert: ClockSkewDetected
|
||||
@@ -1130,24 +1327,6 @@ spec:
|
||||
severity: warning
|
||||
- name: node-network
|
||||
rules:
|
||||
- alert: NetworkReceiveErrors
|
||||
annotations:
|
||||
message: Network interface "{{ $labels.device }}" showing receive errors on
|
||||
node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
expr: |
|
||||
rate(node_network_receive_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NetworkTransmitErrors
|
||||
annotations:
|
||||
message: Network interface "{{ $labels.device }}" showing transmit errors
|
||||
on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"
|
||||
expr: |
|
||||
rate(node_network_transmit_errs_total{job="node-exporter",device!~"veth.+"}[2m]) > 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkInterfaceFlapping
|
||||
annotations:
|
||||
message: Network interface "{{ $labels.device }}" changing it's up status
|
||||
|
Reference in New Issue
Block a user