40 lines
1.3 KiB
YAML
40 lines
1.3 KiB
YAML
groups:
|
|
- name: general.rules
|
|
rules:
|
|
- alert: TargetDown
|
|
expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: '{{ $value }}% of {{ $labels.job }} targets are down.'
|
|
summary: Targets are down
|
|
- alert: DeadMansSwitch
|
|
expr: vector(1)
|
|
labels:
|
|
severity: none
|
|
annotations:
|
|
description: This is a DeadMansSwitch meant to ensure that the entire Alerting
|
|
pipeline is functional.
|
|
summary: Alerting DeadMansSwitch
|
|
- record: fd_utilization
|
|
expr: process_open_fds / process_max_fds
|
|
- alert: FdExhaustionClose
|
|
expr: predict_linear(fd_utilization[1h], 3600 * 4) > 1
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
|
will exhaust in file/socket descriptors within the next 4 hours'
|
|
summary: file descriptors soon exhausted
|
|
- alert: FdExhaustionClose
|
|
expr: predict_linear(fd_utilization[10m], 3600) > 1
|
|
for: 10m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} instance
|
|
will exhaust in file/socket descriptors within the next hour'
|
|
summary: file descriptors soon exhausted
|