kube-prometheus: drop conntrack alerts and direct up alerts

This commit is contained in:
Frederic Branczyk
2017-06-06 15:22:28 +02:00
parent 30cbd76944
commit 0c35d73e2c
4 changed files with 24 additions and 82 deletions

View File

@@ -1,14 +1,14 @@
### Up Alerting ###
Alert TargetDown
IF up == 0
IF 100 * (count(up == 0) / count(up)) > 3
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "target is down",
description = "A target of type {{ $labels.job }} is down."
summary = "Targets are down",
description = "More than {{ $value }}% of targets are down."
}
### Dead man's switch ###
@@ -25,26 +25,15 @@ ALERT DeadMansSwitch
### File descriptor alerts ###
ALERT TooManyOpenFiles
IF 100*process_open_fds / process_max_fds > 50
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "too many open file descriptors",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} is using {{ $value }}% of the available file/socket descriptors.",
}
ALERT K8STooManyOpenFiles
IF 100*process_open_fds / process_max_fds > 80
ALERT TooManyOpenFileDescriptors
IF 100 * (process_open_fds / process_max_fds) > 95
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "too many open file descriptors",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} is using {{ $value }}% of the available file/socket descriptors.",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) is using {{ $value }}% of the available file/socket descriptors.",
}
instance:fd_utilization = process_open_fds / process_max_fds
@@ -58,7 +47,7 @@ ALERT FdExhaustionClose
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance {{ $labels.namespace }}/{{ $labels.pod }} will exhaust in file descriptors soon",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon",
}
# alert if file descriptors are likely to exhaust within the next hour
@@ -70,40 +59,5 @@ ALERT FdExhaustionClose
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance {{ $labels.namespace }}/{{ $labels.pod }} will exhaust in file descriptors soon",
}
### Contrack alerts ###
# To catch the conntrack sysctl de-tuning when it happens
ALERT ConntrackTuningMissing
IF node_nf_conntrack_udp_timeout > 10
FOR 10m
LABELS {
severity = "warning",
}
ANNOTATIONS {
summary = "Node does not have the correct conntrack tunings",
description = "Nodes keep un-setting the correct tunings, investigate when it happens.",
}
ALERT ConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 50
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
}
ALERT ConntrackTableFull
IF 100*node_nf_conntrack_entries / node_nf_conntrack_entries_limit > 90
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "Number of tracked connections is near the limit",
description = "The nf_conntrack table is {{ $value }}% full.",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon",
}