Add runbook_url annotation to all alerts

Signed-off-by: ArthurSens <arthursens2005@gmail.com>
2021-03-05 12:54:03 +00:00
parent ebd4b28b91
commit e586afb280
10 changed files with 72 additions and 13 deletions
--- a/manifests/node-exporter-prometheusRule.yaml
+++ b/manifests/node-exporter-prometheusRule.yaml
@@ -17,6 +17,7 @@ spec:
    - alert: NodeFilesystemSpaceFillingUp
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
        summary: Filesystem is predicted to run out of space within the next 24 hours.
      expr: |
        (
@@ -32,6 +33,7 @@ spec:
    - alert: NodeFilesystemSpaceFillingUp
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemspacefillingup
        summary: Filesystem is predicted to run out of space within the next 4 hours.
      expr: |
        (
@@ -47,6 +49,7 @@ spec:
    - alert: NodeFilesystemAlmostOutOfSpace
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
        summary: Filesystem has less than 5% space left.
      expr: |
        (
@@ -60,6 +63,7 @@ spec:
    - alert: NodeFilesystemAlmostOutOfSpace
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutofspace
        summary: Filesystem has less than 3% space left.
      expr: |
        (
@@ -73,6 +77,7 @@ spec:
    - alert: NodeFilesystemFilesFillingUp
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
        summary: Filesystem is predicted to run out of inodes within the next 24 hours.
      expr: |
        (
@@ -88,6 +93,7 @@ spec:
    - alert: NodeFilesystemFilesFillingUp
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemfilesfillingup
        summary: Filesystem is predicted to run out of inodes within the next 4 hours.
      expr: |
        (
@@ -103,6 +109,7 @@ spec:
    - alert: NodeFilesystemAlmostOutOfFiles
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
        summary: Filesystem has less than 5% inodes left.
      expr: |
        (
@@ -116,6 +123,7 @@ spec:
    - alert: NodeFilesystemAlmostOutOfFiles
      annotations:
        description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodefilesystemalmostoutoffiles
        summary: Filesystem has less than 3% inodes left.
      expr: |
        (
@@ -129,6 +137,7 @@ spec:
    - alert: NodeNetworkReceiveErrs
      annotations:
        description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworkreceiveerrs
        summary: Network interface is reporting many receive errors.
      expr: |
        rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m]) > 0.01
@@ -138,6 +147,7 @@ spec:
    - alert: NodeNetworkTransmitErrs
      annotations:
        description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodenetworktransmiterrs
        summary: Network interface is reporting many transmit errors.
      expr: |
        rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m]) > 0.01
@@ -147,6 +157,7 @@ spec:
    - alert: NodeHighNumberConntrackEntriesUsed
      annotations:
        description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodehighnumberconntrackentriesused
        summary: Number of conntrack are getting close to the limit.
      expr: |
        (node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
@@ -155,6 +166,7 @@ spec:
    - alert: NodeTextFileCollectorScrapeError
      annotations:
        description: Node Exporter text file collector failed to scrape.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodetextfilecollectorscrapeerror
        summary: Node Exporter text file collector failed to scrape.
      expr: |
        node_textfile_scrape_error{job="node-exporter"} == 1
@@ -163,6 +175,7 @@ spec:
    - alert: NodeClockSkewDetected
      annotations:
        message: Clock on {{ $labels.instance }} is out of sync by more than 300s. Ensure NTP is configured correctly on this host.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclockskewdetected
        summary: Clock skew detected.
      expr: |
        (
@@ -182,6 +195,7 @@ spec:
    - alert: NodeClockNotSynchronising
      annotations:
        message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/nodeclocknotsynchronising
        summary: Clock not synchronising.
      expr: |
        min_over_time(node_timex_sync_status[5m]) == 0
@@ -193,6 +207,7 @@ spec:
    - alert: NodeRAIDDegraded
      annotations:
        description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddegraded
        summary: RAID Array is degraded
      expr: |
        node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
@@ -202,6 +217,7 @@ spec:
    - alert: NodeRAIDDiskFailure
      annotations:
        description: At least one device in RAID array on {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
+        runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/noderaiddiskfailure
        summary: Failed device in RAID array
      expr: |
        node_md_disks{state="fail"} > 0