jsonnet: Use upstream prometheus-mixin

This commit is contained in:
Frederic Branczyk
2019-07-11 15:21:23 +02:00
parent f0afafdb08
commit c8c850ef2b
6 changed files with 30 additions and 154 deletions

View File

@@ -1,5 +1,4 @@
(import 'alertmanager.libsonnet') +
(import 'general.libsonnet') +
(import 'node.libsonnet') +
(import 'prometheus.libsonnet') +
(import 'prometheus-operator.libsonnet')

View File

@@ -1,151 +0,0 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus.rules',
rules: [
{
alert: 'PrometheusConfigReloadFailed',
annotations: {
description: "Reloading Prometheus' configuration has failed for {{$labels.namespace}}/{{$labels.pod}}",
summary: "Reloading Prometheus' configuration failed",
},
expr: |||
prometheus_config_last_reload_successful{%(prometheusSelector)s} == 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotificationQueueRunningFull',
annotations: {
description: "Prometheus' alert notification queue is running full for {{$labels.namespace}}/{{ $labels.pod}}",
summary: "Prometheus' alert notification queue is running full",
},
expr: |||
predict_linear(prometheus_notifications_queue_length{%(prometheusSelector)s}[5m], 60 * 30) > prometheus_notifications_queue_capacity{%(prometheusSelector)s}
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alert from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.01
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusErrorSendingAlerts',
annotations: {
description: 'Errors while sending alerts from Prometheus {{$labels.namespace}}/{{ $labels.pod}} to Alertmanager {{$labels.Alertmanager}}',
summary: 'Errors while sending alerts from Prometheus',
},
expr: |||
rate(prometheus_notifications_errors_total{%(prometheusSelector)s}[5m]) / rate(prometheus_notifications_sent_total{%(prometheusSelector)s}[5m]) > 0.03
||| % $._config,
'for': '10m',
labels: {
severity: 'critical',
},
},
{
alert: 'PrometheusNotConnectedToAlertmanagers',
annotations: {
description: 'Prometheus {{ $labels.namespace }}/{{ $labels.pod}} is not connected to any Alertmanagers',
summary: 'Prometheus is not connected to any Alertmanagers',
},
expr: |||
prometheus_notifications_alertmanagers_discovered{%(prometheusSelector)s} < 1
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBReloadsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} reload failures over the last four hours.',
summary: 'Prometheus has issues reloading data blocks from disk',
},
expr: |||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBCompactionsFailing',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} had {{$value | humanize}} compaction failures over the last four hours.',
summary: 'Prometheus has issues compacting sample blocks',
},
expr: |||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0
||| % $._config,
'for': '12h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTSDBWALCorruptions',
annotations: {
description: '{{$labels.job}} at {{$labels.instance}} has a corrupted write-ahead log (WAL).',
summary: 'Prometheus write-ahead log is corrupted',
},
expr: |||
prometheus_tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0
||| % $._config,
'for': '4h',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusNotIngestingSamples',
annotations: {
description: "Prometheus {{ $labels.namespace }}/{{ $labels.pod}} isn't ingesting samples.",
summary: "Prometheus isn't ingesting samples",
},
expr: |||
rate(prometheus_tsdb_head_samples_appended_total{%(prometheusSelector)s}[5m]) <= 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
{
alert: 'PrometheusTargetScrapesDuplicate',
annotations: {
description: '{{$labels.namespace}}/{{$labels.pod}} has many samples rejected due to duplicate timestamps but different values',
summary: 'Prometheus has many samples rejected',
},
expr: |||
increase(prometheus_target_scrapes_sample_duplicate_timestamp_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
},
],
},
],
},
}

View File

@@ -49,6 +49,16 @@
}
},
"version": "master"
},
{
"name": "prometheus",
"source": {
"git": {
"remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin"
}
},
"version": "master"
}
]
}

View File

@@ -10,6 +10,7 @@ local configMapList = k3.core.v1.configMapList;
(import 'prometheus/prometheus.libsonnet') +
(import 'prometheus-adapter/prometheus-adapter.libsonnet') +
(import 'kubernetes-mixin/mixin.libsonnet') +
(import 'prometheus/mixin.libsonnet') +
(import 'alerts/alerts.libsonnet') +
(import 'rules/rules.libsonnet') + {
kubePrometheus+:: {
@@ -89,6 +90,7 @@ local configMapList = k3.core.v1.configMapList;
alertmanagerSelector: 'job="alertmanager-main",namespace="' + $._config.namespace + '"',
prometheusSelector: 'job="prometheus-' + $._config.prometheus.name + '",namespace="' + $._config.namespace + '"',
prometheusName: '{{$labels.namespace}}/{{$labels.pod}}',
prometheusOperatorSelector: 'job="prometheus-operator",namespace="' + $._config.namespace + '"',
jobs: {
@@ -111,5 +113,6 @@ local configMapList = k3.core.v1.configMapList;
grafana+:: {
dashboards: $.grafanaDashboards,
},
},
}