Files
kube-prometheus/assets/prometheus/rules/general.rules
2017-06-06 15:22:28 +02:00

64 lines
1.8 KiB
Plaintext

### Up Alerting ###
Alert TargetDown
IF 100 * (count(up == 0) / count(up)) > 3
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "Targets are down",
description = "More than {{ $value }}% of targets are down."
}
### Dead man's switch ###
ALERT DeadMansSwitch
IF vector(1)
LABELS {
severity = "none",
}
ANNOTATIONS {
summary = "Alerting DeadMansSwitch",
description = "This is a DeadMansSwitch meant to ensure that the entire Alerting pipeline is functional.",
}
### File descriptor alerts ###
ALERT TooManyOpenFileDescriptors
IF 100 * (process_open_fds / process_max_fds) > 95
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "too many open file descriptors",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) is using {{ $value }}% of the available file/socket descriptors.",
}
instance:fd_utilization = process_open_fds / process_max_fds
# alert if file descriptors are likely to exhaust within the next 4 hours
ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon",
}
# alert if file descriptors are likely to exhaust within the next hour
ALERT FdExhaustionClose
IF predict_linear(instance:fd_utilization[10m], 3600) > 1
FOR 10m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "file descriptors soon exhausted",
description = "{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon",
}