kube-prometheus: Add clock skew and node network interface alerts
This commit is contained in:
@@ -32,6 +32,81 @@
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'node-time',
|
||||
rules: [
|
||||
{
|
||||
alert: 'ClockSkewDetected',
|
||||
annotations: {
|
||||
message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.',
|
||||
},
|
||||
expr: |||
|
||||
node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: 'node-network',
|
||||
rules: [
|
||||
{
|
||||
alert: 'NetworkReceiveErrors',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NetworkTransmitErrors',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NodeNetworkInterfaceDown',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'NodeNetworkInterfaceFlapping',
|
||||
annotations: {
|
||||
message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||
},
|
||||
expr: |||
|
||||
changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2
|
||||
||| % $._config,
|
||||
'for': '2m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
@@ -101,6 +101,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
// Once node exporter is being released with those settings, this can be removed.
|
||||
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)',
|
||||
'--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$',
|
||||
'--collector.ntp',
|
||||
]) +
|
||||
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
|
||||
container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
|
||||
|
Reference in New Issue
Block a user