kube-prometheus: Add clock skew and node network interface alerts
This commit is contained in:
@@ -32,6 +32,81 @@
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'node-time',
|
||||||
|
rules: [
|
||||||
|
{
|
||||||
|
alert: 'ClockSkewDetected',
|
||||||
|
annotations: {
|
||||||
|
message: 'Clock skew detected on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}. Ensure NTP is configured correctly on this host.',
|
||||||
|
},
|
||||||
|
expr: |||
|
||||||
|
node_ntp_offset_seconds{%(nodeExporterSelector)s} < -0.03 or node_ntp_offset_seconds{%(nodeExporterSelector)s} > 0.03
|
||||||
|
||| % $._config,
|
||||||
|
'for': '2m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'node-network',
|
||||||
|
rules: [
|
||||||
|
{
|
||||||
|
alert: 'NetworkReceiveErrors',
|
||||||
|
annotations: {
|
||||||
|
message: 'Network interface "{{ $labels.device }}" showing receive errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||||
|
},
|
||||||
|
expr: |||
|
||||||
|
rate(node_network_receive_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '2m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'NetworkTransmitErrors',
|
||||||
|
annotations: {
|
||||||
|
message: 'Network interface "{{ $labels.device }}" showing transmit errors on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||||
|
},
|
||||||
|
expr: |||
|
||||||
|
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '2m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'NodeNetworkInterfaceDown',
|
||||||
|
annotations: {
|
||||||
|
message: 'Network interface "{{ $labels.device }}" down on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||||
|
},
|
||||||
|
expr: |||
|
||||||
|
node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s} == 0
|
||||||
|
||| % $._config,
|
||||||
|
'for': '2m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
alert: 'NodeNetworkInterfaceFlapping',
|
||||||
|
annotations: {
|
||||||
|
message: 'Network interface "{{ $labels.device }}" changing it\'s up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}"',
|
||||||
|
},
|
||||||
|
expr: |||
|
||||||
|
changes(node_network_up{%(nodeExporterSelector)s,%(hostNetworkInterfaceSelector)s}[2m]) > 2
|
||||||
|
||| % $._config,
|
||||||
|
'for': '2m',
|
||||||
|
labels: {
|
||||||
|
severity: 'warning',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@@ -101,6 +101,7 @@ local k = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
|||||||
// Once node exporter is being released with those settings, this can be removed.
|
// Once node exporter is being released with those settings, this can be removed.
|
||||||
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)',
|
'--collector.filesystem.ignored-mount-points=^/(dev|proc|sys|var/lib/docker/.+)($|/)',
|
||||||
'--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$',
|
'--collector.filesystem.ignored-fs-types=^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$',
|
||||||
|
'--collector.ntp',
|
||||||
]) +
|
]) +
|
||||||
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
|
container.withVolumeMounts([procVolumeMount, sysVolumeMount, rootVolumeMount]) +
|
||||||
container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
|
container.mixin.resources.withRequests({ cpu: '102m', memory: '180Mi' }) +
|
||||||
|
Reference in New Issue
Block a user