groups: - name: ./general.rules rules: - alert: TargetDown expr: 100 * (count(up == 0) BY (job) / count(up) BY (job)) > 10 for: 10m labels: severity: warning annotations: description: '{{ $value }}% or more of {{ $labels.job }} targets are down.' summary: Targets are down - alert: DeadMansSwitch expr: vector(1) labels: severity: none annotations: description: This is a DeadMansSwitch meant to ensure that the entire Alerting pipeline is functional. summary: Alerting DeadMansSwitch - alert: TooManyOpenFileDescriptors expr: 100 * (process_open_fds / process_max_fds) > 95 for: 10m labels: severity: critical annotations: description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) is using {{ $value }}% of the available file/socket descriptors.' summary: too many open file descriptors - record: instance:fd_utilization expr: process_open_fds / process_max_fds - alert: FdExhaustionClose expr: predict_linear(instance:fd_utilization[1h], 3600 * 4) > 1 for: 10m labels: severity: warning annotations: description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon' summary: file descriptors soon exhausted - alert: FdExhaustionClose expr: predict_linear(instance:fd_utilization[10m], 3600) > 1 for: 10m labels: severity: critical annotations: description: '{{ $labels.job }}: {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.instance }}) instance will exhaust in file/socket descriptors soon' summary: file descriptors soon exhausted