Use absolut jsonnet imports whenever possible
There are still some dependencies that we need to make work to fully deactivate the legacyImports in the future. I'll start opening PRs against those other repositories.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd",
|
||||
"remote": "https://github.com/etcd-io/etcd",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
prometheus+:: {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local statefulSet = k.apps.v1.statefulSet;
|
||||
local affinity = statefulSet.mixin.spec.template.spec.affinity.podAntiAffinity.preferredDuringSchedulingIgnoredDuringExecutionType;
|
||||
local matchExpression = affinity.mixin.podAffinityTerm.labelSelector.matchExpressionsType;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
// Custom metrics API allows the HPA v2 to scale based on arbirary metrics.
|
||||
// For more details on usage visit https://github.com/DirectXMan12/k8s-prometheus-adapter#quick-links
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local kp = (import 'kube-prometheus/kube-prometheus.libsonnet');
|
||||
local kp = (import './kube-prometheus/kube-prometheus.libsonnet');
|
||||
|
||||
{ ['0prometheus-operator-' + name]: kp.prometheusOperator[name] for name in std.objectFields(kp.prometheusOperator) } +
|
||||
{ ['node-exporter-' + name]: kp.nodeExporter[name] for name in std.objectFields(kp.nodeExporter) } +
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
(import 'etcd-mixin/mixin.libsonnet') + {
|
||||
(import 'github.com/etcd-io/etcd/Documentation/etcd-mixin/mixin.libsonnet') + {
|
||||
_config+:: {
|
||||
etcd: {
|
||||
ips: [],
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local service = k.core.v1.service;
|
||||
local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
|
||||
@@ -183,7 +183,7 @@ local servicePort = k.core.v1.service.mixin.spec.portsType;
|
||||
],
|
||||
},
|
||||
grafanaDashboards+:: {
|
||||
'weave-net.json': (import 'grafana-weave-net.json'),
|
||||
'weave-net-cluster.json': (import 'grafana-weave-net-cluster.json'),
|
||||
'weave-net.json': (import './grafana-weave-net.json'),
|
||||
'weave-net-cluster.json': (import './grafana-weave-net-cluster.json'),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
|
||||
local configMapList = k3.core.v1.configMapList;
|
||||
|
||||
(import 'grafana/grafana.libsonnet') +
|
||||
(import 'kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
(import 'kube-state-metrics-mixin/mixin.libsonnet') +
|
||||
(import 'node-exporter/node-exporter.libsonnet') +
|
||||
(import 'node-mixin/mixin.libsonnet') +
|
||||
(import 'alertmanager/alertmanager.libsonnet') +
|
||||
(import 'prometheus-operator/prometheus-operator.libsonnet') +
|
||||
(import 'prometheus/prometheus.libsonnet') +
|
||||
(import 'prometheus-adapter/prometheus-adapter.libsonnet') +
|
||||
(import 'kubernetes-mixin/mixin.libsonnet') +
|
||||
(import 'prometheus/mixin.libsonnet') +
|
||||
(import 'alerts/alerts.libsonnet') +
|
||||
(import 'rules/rules.libsonnet') + {
|
||||
(import 'github.com/brancz/kubernetes-grafana/grafana/grafana.libsonnet') +
|
||||
(import './kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
(import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics-mixin/mixin.libsonnet') +
|
||||
(import './node-exporter/node-exporter.libsonnet') +
|
||||
(import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics-mixin/mixin.libsonnet') +
|
||||
(import './alertmanager/alertmanager.libsonnet') +
|
||||
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
|
||||
(import './prometheus/prometheus.libsonnet') +
|
||||
(import './prometheus-adapter/prometheus-adapter.libsonnet') +
|
||||
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +
|
||||
(import 'github.com/prometheus/prometheus/documentation/prometheus-mixin/mixin.libsonnet') +
|
||||
(import './alerts/alerts.libsonnet') +
|
||||
(import './rules/rules.libsonnet') + {
|
||||
kubePrometheus+:: {
|
||||
namespace: k.core.v1.namespace.new($._config.namespace),
|
||||
},
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
local deployment = k.apps.v1.deployment;
|
||||
local container = deployment.mixin.spec.template.spec.containersType;
|
||||
local containerPort = container.portsType;
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
scrapeTimeout: '30s',
|
||||
},
|
||||
},
|
||||
kubeStateMetrics+:: (import 'kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
kubeStateMetrics+:: (import 'github.com/kubernetes/kube-state-metrics/jsonnet/kube-state-metrics/kube-state-metrics.libsonnet') +
|
||||
{
|
||||
local ksm = self,
|
||||
name:: 'kube-state-metrics',
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
local k3 = import 'ksonnet/ksonnet.beta.3/k.libsonnet';
|
||||
local k = import 'ksonnet/ksonnet.beta.4/k.libsonnet';
|
||||
local k3 = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.3/k.libsonnet';
|
||||
local k = import 'github.com/ksonnet/ksonnet-lib/ksonnet.beta.4/k.libsonnet';
|
||||
|
||||
{
|
||||
_config+:: {
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
{
|
||||
"source": {
|
||||
"git": {
|
||||
"remote": "https://github.com/coreos/etcd.git",
|
||||
"remote": "https://github.com/etcd-io/etcd.git",
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -81,15 +81,6 @@ spec:
|
||||
- mountPath: /grafana-dashboard-definitions/0/namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/nodes
|
||||
name: grafana-dashboard-nodes
|
||||
readOnly: false
|
||||
- mountPath: /grafana-dashboard-definitions/0/persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
readOnly: false
|
||||
@@ -166,15 +157,6 @@ spec:
|
||||
- configMap:
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
- configMap:
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
- configMap:
|
||||
name: grafana-dashboard-nodes
|
||||
name: grafana-dashboard-nodes
|
||||
- configMap:
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
|
||||
@@ -8,63 +8,6 @@ metadata:
|
||||
namespace: monitoring
|
||||
spec:
|
||||
groups:
|
||||
- name: node-exporter.rules
|
||||
rules:
|
||||
- expr: |
|
||||
count without (cpu) (
|
||||
count without (mode) (
|
||||
node_cpu_seconds_total{job="node-exporter"}
|
||||
)
|
||||
)
|
||||
record: instance:node_num_cpu:sum
|
||||
- expr: |
|
||||
1 - avg without (cpu, mode) (
|
||||
rate(node_cpu_seconds_total{job="node-exporter", mode="idle"}[1m])
|
||||
)
|
||||
record: instance:node_cpu_utilisation:rate1m
|
||||
- expr: |
|
||||
(
|
||||
node_load1{job="node-exporter"}
|
||||
/
|
||||
instance:node_num_cpu:sum{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_load1_per_cpu:ratio
|
||||
- expr: |
|
||||
1 - (
|
||||
node_memory_MemAvailable_bytes{job="node-exporter"}
|
||||
/
|
||||
node_memory_MemTotal_bytes{job="node-exporter"}
|
||||
)
|
||||
record: instance:node_memory_utilisation:ratio
|
||||
- expr: |
|
||||
rate(node_vmstat_pgmajfault{job="node-exporter"}[1m])
|
||||
record: instance:node_vmstat_pgmajfault:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_seconds:rate1m
|
||||
- expr: |
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+"}[1m])
|
||||
record: instance_device:node_disk_io_time_weighted_seconds:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_bytes_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_receive_drop_excluding_lo:rate1m
|
||||
- expr: |
|
||||
sum without (device) (
|
||||
rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[1m])
|
||||
)
|
||||
record: instance:node_network_transmit_drop_excluding_lo:rate1m
|
||||
- name: kube-apiserver.rules
|
||||
rules:
|
||||
- expr: |
|
||||
@@ -816,234 +759,38 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: node-exporter
|
||||
- name: kube-state-metrics
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
- alert: KubeStateMetricsListErrors
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||
in list operations. This is likely causing it to not be able to expose metrics
|
||||
about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricslisterrors
|
||||
summary: kube-state-metrics is experiencing errors in list operations.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 15
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available space left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left and is filling
|
||||
up fast.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }} at {{ $labels.instance }}
|
||||
has only {{ printf "%.2f" $value }}% available inodes left.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!=""} / node_filesystem_files{job="node-exporter",fstype!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: |
|
||||
increase(node_network_receive_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: |
|
||||
increase(node_network_transmit_errs_total[2m]) > 10
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: |
|
||||
(node_nf_conntrack_entries / node_nf_conntrack_entries_limit) > 0.75
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description: Node Exporter text file collector failed to scrape.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: |
|
||||
node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is out of sync by more than 300s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |
|
||||
(
|
||||
node_timex_offset_seconds > 0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds[5m]) >= 0
|
||||
)
|
||||
or
|
||||
(
|
||||
node_timex_offset_seconds < -0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds[5m]) <= 0
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
message: Clock on {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |
|
||||
min_over_time(node_timex_sync_status[5m]) == 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeRAIDDegraded
|
||||
annotations:
|
||||
description: RAID array '{{ $labels.device }}' on {{ $labels.instance }} is
|
||||
in degraded state due to one or more disks failures. Number of spare drives
|
||||
is insufficient to fix issue automatically.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddegraded
|
||||
summary: RAID Array is degraded
|
||||
expr: |
|
||||
node_md_disks_required - ignoring (state) (node_md_disks{state="active"}) > 0
|
||||
(sum(rate(kube_state_metrics_list_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
/
|
||||
sum(rate(kube_state_metrics_list_total{job="kube-state-metrics"}[5m])))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeRAIDDiskFailure
|
||||
- alert: KubeStateMetricsWatchErrors
|
||||
annotations:
|
||||
description: At least one device in RAID array on {{ $labels.instance }} failed.
|
||||
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-noderaiddiskfailure
|
||||
summary: Failed device in RAID array
|
||||
description: kube-state-metrics is experiencing errors at an elevated rate
|
||||
in watch operations. This is likely causing it to not be able to expose
|
||||
metrics about Kubernetes objects correctly or at all.
|
||||
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatemetricswatcherrors
|
||||
summary: kube-state-metrics is experiencing errors in watch operations.
|
||||
expr: |
|
||||
node_md_disks{state="fail"} > 0
|
||||
(sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics",result="error"}[5m]))
|
||||
/
|
||||
sum(rate(kube_state_metrics_watch_total{job="kube-state-metrics"}[5m])))
|
||||
> 0.01
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
severity: critical
|
||||
- name: kubernetes-apps
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
|
||||
Reference in New Issue
Block a user